Jaegeon Lee’s Master’s Thesis

Differences in Commuting Inflow Patterns among the Employment Districts in Seoul along with the Adoption of Work-from-Home during the COVID-19 Pandemic Period

R Setup

library(tidyverse)
library(DescTools)
library(kableExtra)
library(rgdal)
library(rgeos)
library(sf)
library(raster)
library(spdep)
library(tmap)
library(tmaptools)
library(cartogram)
library(viridisLite)
library(patchwork) 
library(xts)
library(data.table)
library(od)
library(viridisLite)
library(DT)
library(foreach)
library(doParallel)
library(igraph)
library(tidygraph)
library(furrr)
library(rlist)
library(ggrepel)
library(ggeffects)
library(factoextra)
library(FactoMineR)
library(lwgeom)
library(bipartite)
library(tsibble)
library(lubridate)
library(hrbrthemes)
library(sjPlot)
library(forcats)
## 문자열 깨질 때 로케일 설정 - 윈도우 
#Sys.getlocale() 
#localeToCharset() 
#
## 미국 로케일로 로케일을 변환하기 
#Sys.setlocale(category = 'LC_ALL',locale = 'english') 
#localeToCharset() 
#
## 우리나라로 로케일 변경하기 
#Sys.setlocale(category = 'LC_ALL',locale='korean') 
#localeToCharset()
#options(scipen = 100)



1 consistent administration codes

1.1 kosis code

kosis_code <- readxl::read_excel("adm_codes/통계청_행정동코드_2021년기준.xlsx")
colnames(kosis_code) <- c("adm_cd", "adm_nm")
kosis_code
:)   # A tibble: 3,792 × 2
:)      adm_cd       adm_nm        
:)      <chr>        <chr>         
:)    1 <NA>         <NA>          
:)    2 행정구역코드 행정구역명    
:)    3 11           서울특별시    
:)    4 21           부산광역시    
:)    5 22           대구광역시    
:)    6 23           인천광역시    
:)    7 24           광주광역시    
:)    8 25           대전광역시    
:)    9 26           울산광역시    
:)   10 29           세종특별자치시
:)   # ℹ 3,782 more rows
sujung <- tribble(
  ~sgg_cd, ~sgg_nm,
  "31021", "수정구"
)

kosis_sgg <- kosis_code %>%
  rename(sgg_cd = adm_cd) %>%
  rename(sgg_nm = adm_nm) %>%
  filter(!sgg_nm %in% c("수원시", "성남시", "안양시", "안산시", "고양시", "용인시")) %>%
  filter(str_length(sgg_cd) == 5) %>%
  filter(str_sub(sgg_cd, 1, 2) %in% c("11", "23", "31")) %>%
  filter(sgg_nm != "옹진군" | str_sub(sgg_cd, 1, 5) != "23320") %>%
  bind_rows(sujung) %>%
  arrange(sgg_cd)
kosis_sgg
:)   # A tibble: 76 × 2
:)      sgg_cd sgg_nm  
:)      <chr>  <chr>   
:)    1 11010  종로구  
:)    2 11020  중구    
:)    3 11030  용산구  
:)    4 11040  성동구  
:)    5 11050  광진구  
:)    6 11060  동대문구
:)    7 11070  중랑구  
:)    8 11080  성북구  
:)    9 11090  강북구  
:)   10 11100  도봉구  
:)   # ℹ 66 more rows
kosis_code <- kosis_code %>%
  filter(as.numeric(adm_cd) > 0  & str_length(adm_cd) > 5) %>%
  filter(str_sub(adm_cd, 1, 2) %in% c("11", "23", "31")) %>%
  filter(str_sub(adm_cd, 1, 5) != "23320") %>%
  mutate(sgg_cd = str_sub(adm_cd, 1, 5)) %>%
  left_join(kosis_sgg, by = "sgg_cd") %>%
  select(-sgg_cd) %>%
  mutate(adm_nm = str_c(sgg_nm, "_", adm_nm)) %>%
  select(adm_cd, adm_nm)
kosis_code
:)   # A tibble: 1,130 × 2
:)      adm_cd  adm_nm                
:)      <chr>   <chr>                 
:)    1 1101053 종로구_사직동         
:)    2 1101054 종로구_삼청동         
:)    3 1101055 종로구_부암동         
:)    4 1101056 종로구_평창동         
:)    5 1101057 종로구_무악동         
:)    6 1101058 종로구_교남동         
:)    7 1101060 종로구_가회동         
:)    8 1101061 종로구_종로1.2.3.4가동
:)    9 1101063 종로구_종로5.6가동    
:)   10 1101064 종로구_이화동         
:)   # ℹ 1,120 more rows
colSums(is.na(kosis_code))
:)   adm_cd adm_nm 
:)        0      0

1.2 shp code

# KOSIS에는 항동이 있지만, 생활이동데이터에는 항동이 없음
dong.sf <- sf::st_read("data_shp/shp_edited_sma_contiguous/shp_edited_sma_contiguous.shp", options = "ENCODING=euc-kr") %>%
  select(-BASE_DATE)
:)   options:        ENCODING=euc-kr 
:)   Reading layer `shp_edited_sma_contiguous' from data source `C:\Users\lejae\OneDrive\바탕 화면\thesis_최종\thesis_analysis_분석\data_shp\shp_edited_sma_contiguous\shp_edited_sma_contiguous.shp' using driver `ESRI Shapefile'
:)   Simple feature collection with 1124 features and 3 fields
:)   Geometry type: MULTIPOLYGON
:)   Dimension:     XY
:)   Bounding box:  xmin: 865000 ymin: 1880000 xmax: 1030000 ymax: 2030000
:)   Projected CRS: Korea 2000 / Unified CS
colnames(dong.sf) <- c("adm_cd", "adm_nm", "geometry")

dong.sf <- dong.sf %>%                                                  
  filter(adm_nm != "옹진군" | str_sub(adm_cd, 1, 5) != "23320") %>%     # 23320이 옹진군임
  filter(as.numeric(adm_cd) > 0  & str_length(adm_cd) > 5) %>%
  filter(str_sub(adm_cd, 1, 2) %in% c("11", "23", "31")) %>%
  mutate(sgg_cd = str_sub(adm_cd, 1, 5)) %>%
  left_join(kosis_sgg, by = "sgg_cd") %>%
  select(-sgg_cd) %>%
  mutate(adm_nm = str_c(sgg_nm, "_", adm_nm)) %>%
  mutate(adm_nm = stringr::str_replace_all(adm_nm, "·", ".")) %>%
  
  mutate(adm_cd = replace(adm_cd, adm_nm == "구로구_오류2동", "1117068"),     # 생활이동데이터에 기반한 멤버십에 항동이 따로 없으므로 이 단계에서는 필수적으로 이렇게 해야 함.
         adm_cd = replace(adm_cd, adm_nm == "구로구_항동", "1117068"),
         adm_nm = replace(adm_nm, adm_nm == "구로구_항동", "구로구_오류2동")) %>%
  st_snap_to_grid(size = 0.08) %>%
  st_make_valid() %>%
  group_by(adm_nm, adm_cd) %>%
  summarise(geometry = st_union(geometry)) %>%
  ungroup() %>%
  arrange(adm_cd)
dong.sf
:)   Simple feature collection with 1123 features and 2 fields
:)   Geometry type: GEOMETRY
:)   Dimension:     XY
:)   Bounding box:  xmin: 865000 ymin: 1880000 xmax: 1030000 ymax: 2030000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 1,123 × 3
:)      adm_nm                 adm_cd                                                                                 geometry
:)      <chr>                  <chr>                                                                        <MULTIPOLYGON [m]>
:)    1 종로구_사직동          1101053 (((953554 1953336, 953555 1953320, 953556 1953307, 953557 1953295, 953558 1953281, 9...
:)    2 종로구_삼청동          1101054 (((953844 1955492, 953859 1955490, 953902 1955493, 953912 1955493, 953916 1955492, 9...
:)    3 종로구_부암동          1101055 (((952490 1956549, 952498 1956533, 952501 1956525, 952501 1956524, 952492 1956515, 9...
:)    4 종로구_평창동          1101056 (((953684 1959210, 953665 1959132, 953647 1959057, 953651 1959043, 953672 1958971, 9...
:)    5 종로구_무악동          1101057 (((952298 1953540, 952325 1953508, 952329 1953500, 952338 1953484, 952339 1953482, 9...
:)    6 종로구_교남동          1101058 (((952572 1953259, 952573 1953256, 952575 1953250, 952577 1953241, 952580 1953234, 9...
:)    7 종로구_가회동          1101060 (((954895 1954615, 954888 1954592, 954865 1954592, 954856 1954592, 954838 1954563, 9...
:)    8 종로구_종로1.2.3.4가동 1101061 (((954918 1954372, 954926 1954362, 954932 1954355, 954937 1954352, 954949 1954346, 9...
:)    9 종로구_종로5.6가동     1101063 (((956607 1953150, 956607 1953148, 956607 1953146, 956607 1953144, 956607 1953139, 9...
:)   10 종로구_이화동          1101064 (((956366 1954112, 956372 1954108, 956379 1954108, 956379 1954108, 956408 1954108, 9...
:)   # ℹ 1,113 more rows
colSums(is.na(dong.sf))
:)     adm_nm   adm_cd geometry 
:)          0        0        0

2 functional districts

2.1 binding

Refer to the .csv file below binding/cumu_tibble_yesscaling_modularity_CLK_2020_2021_01_time_111213_0720_1100_SemiModifiedExpectation.csv 2020년과 2021년 각각의 1월 평일 11:00 ~ 13:00 동안의 모든 통행 유형을 포함하는 통행 네트워크를 구축함. 위 네트워크를 바탕으로 합역적 바인딩을 수행함.

2.2 import membership information

# 생활이동데이터에는 항동이 없음
membership_info_eng <- read_csv('data_membership/membership_info_eng_2020_2021_01_time_111213_0725_1313_SemiModifiedExpectation.csv', show_col_types = FALSE) %>%
  mutate(ADM_CD = as.character(ADM_CD)) %>%
  rename(name = member_eng) %>%
  select(-ADM_NM) %>%
  rename(adm_cd = ADM_CD) %>%
  mutate(name = ifelse(name == "Myeong", "Myeongdong", name))
membership_info_eng
:)   # A tibble: 424 × 3
:)      adm_cd  member             name     
:)      <chr>   <chr>              <chr>    
:)    1 1101053 left_49_47_further Sajik    
:)    2 1101054 right_49_47        Jongno   
:)    3 1101055 left_49_47         Seongbuk 
:)    4 1101056 left_49_47         Seongbuk 
:)    5 1101057 left_49_47_further Sajik    
:)    6 1101058 left_49_47_further Sajik    
:)    7 1101060 right_49_47        Jongno   
:)    8 1101061 right_49_47        Jongno   
:)    9 1101063 left_14_9          Changshin
:)   10 1101064 right_49_47        Jongno   
:)   # ℹ 414 more rows

2.3 spatial aggregation

dong.sf_commune <- dong.sf %>%
    filter(str_sub(adm_cd, 1, 2) == "11") %>%
    st_snap_to_grid(size = 0.02) %>%
    st_make_valid() %>%
    left_join(membership_info_eng,
              by = "adm_cd") %>%
    group_by(name) %>%
    summarise(geometry = st_union(geometry)) %>%                            
    mutate(name = factor(name)) %>%
    mutate(area = sf::st_area(geometry)) %>%
    mutate(name = as.character(name)) 
dong.sf_commune
:)   Simple feature collection with 54 features and 2 fields
:)   Geometry type: GEOMETRY
:)   Dimension:     XY
:)   Bounding box:  xmin: 935000 ymin: 1940000 xmax: 972000 ymax: 1970000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 54 × 3
:)      name                                                                                           geometry      area
:)    * <chr>                                                                                     <POLYGON [m]>     [m^2]
:)    1 Anam       ((957987 1953401, 957976 1953392, 957958 1953376, 957944 1953364, 957936 1953357, 957909 ... 15774236.
:)    2 Balsan     ((941113 1947544, 941103 1947542, 941103 1947542, 941062 1947534, 941061 1947533, 941057 ...  8854094.
:)    3 Bangbae    ((954771 1940006, 954755 1939998, 954750 1939996, 954733 1939987, 954711 1939975, 954653 ... 15289435.
:)    4 Banghak    ((959860 1959717, 959857 1959710, 959854 1959705, 959858 1959697, 959852 1959692, 959853 ... 19514974.
:)    5 Bangi      ((968192 1945526, 968204 1945515, 968204 1945515, 968210 1945508, 968211 1945506, 968211 ... 10361966.
:)    6 Banpo      ((956728 1945168, 956728 1945168, 956729 1945167, 956729 1945166, 956730 1945165, 956730 ...  6799150.
:)    7 Changshin  ((956717 1952379, 956714 1952379, 956710 1952379, 956708 1952379, 956703 1952379, 956701 ...  1972479.
:)    8 Cheongdam  ((960137 1945124, 960129 1945145, 960128 1945146, 960111 1945193, 960109 1945199, 960100 ...  7369461.
:)    9 Chunghyeon ((953195 1951524, 953196 1951519, 953197 1951516, 953199 1951516, 953206 1951503, 953207 ...  2057803.
:)   10 Daechi     ((961579 1941234, 961557 1941223, 961545 1941217, 961524 1941214, 961492 1941208, 961476 ... 10038396.
:)   # ℹ 44 more rows
colSums(is.na(dong.sf_commune))
:)       name geometry     area 
:)          0        0        0

2.4 mapping: figure 1a

2.4.1 SMA

sido.sf <- dong.sf %>%
    st_snap_to_grid(size = 0.08) %>%
    st_make_valid() %>%
    mutate(sido_cd = str_sub(adm_cd, 1, 2)) %>%
    group_by(sido_cd) %>%
    summarise(geometry = st_union(geometry)) %>%                            
    mutate(sido_cd = factor(sido_cd)) %>%
    mutate(area = sf::st_area(geometry)) %>%
    mutate(sido_cd = as.character(sido_cd)) %>%
    mutate(sido_nm = sido_cd) %>%
    mutate(sido_nm = case_when(as.character(sido_cd) == "11" ~ "Seoul",
                               as.character(sido_cd) == "23" ~ "Incheon",
                               as.character(sido_cd) == "31" ~ "Kyeonggi"))
sido.sf
:)   Simple feature collection with 3 features and 3 fields
:)   Geometry type: GEOMETRY
:)   Dimension:     XY
:)   Bounding box:  xmin: 865000 ymin: 1880000 xmax: 1030000 ymax: 2030000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 3 × 4
:)     sido_cd                                                                                             geometry         area sido_nm 
:)   * <chr>                                                                                         <GEOMETRY [m]>        [m^2] <chr>   
:)   1 11      POLYGON ((949615 1938876, 949613 1938885, 949609 1938899, 949605 1938918, 949604 1938923, 949598 ...   605300078. Seoul   
:)   2 23      MULTIPOLYGON (((927779 1932133, 927771 1932132, 927770 1932133, 927770 1932133, 927766 1932141, 9...   918081359. Incheon 
:)   3 31      MULTIPOLYGON (((931105 1890395, 931117 1890393, 931119 1890392, 931125 1890391, 931129 1890390, 9... 10297762326. Kyeonggi
sido.sf_centroid <- sido.sf %>%
  st_centroid()
sido.sf_centroid %>%
  st_geometry() %>%
  as.data.frame() %>%
  mutate(geometry = as.character(geometry))
:)                                  geometry
:)   1  c(955110.28842015, 1950405.41966252)
:)   2 c(913867.034798075, 1954864.44894772)
:)   3 c(971411.321311472, 1948334.49203402)
st_geometry(sido.sf_centroid)[[2]] <- st_point(c(923867.034798075, 1950864.44894772))
st_geometry(sido.sf_centroid)[[3]] <- st_point(c(991411.321311472, 1937334.49203402))
seoul.sf <- dong.sf %>%
    st_snap_to_grid(size = 0.08) %>%
    st_make_valid() %>%
    filter(str_sub(adm_cd, 1, 2) == "11") %>% 
    summarise(geometry = st_union(geometry))
seoul.sf
:)   Simple feature collection with 1 feature and 0 fields
:)   Geometry type: POLYGON
:)   Dimension:     XY
:)   Bounding box:  xmin: 935000 ymin: 1940000 xmax: 972000 ymax: 1970000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 1 × 1
:)                                                                                         geometry
:)                                                                                    <POLYGON [m]>
:)   1 ((949615 1938876, 949613 1938885, 949609 1938899, 949605 1938918, 949604 1938923, 949598 ...

2.4.2 korea

korea.sf <- sf::st_read("data_shp/shp_edited_korea/bnd_sigungu_00_2020_2020_4Q.shp", options = "ENCODING=euc-kr") %>%
  select(-BASE_DATE)
:)   options:        ENCODING=euc-kr 
:)   Reading layer `bnd_sigungu_00_2020_2020_4Q' from data source `C:\Users\lejae\OneDrive\바탕 화면\thesis_최종\thesis_analysis_분석\data_shp\shp_edited_korea\bnd_sigungu_00_2020_2020_4Q.shp' using driver `ESRI Shapefile'
:)   Simple feature collection with 250 features and 3 fields
:)   Geometry type: MULTIPOLYGON
:)   Dimension:     XY
:)   Bounding box:  xmin: 746000 ymin: 1460000 xmax: 1390000 ymax: 2070000
:)   Projected CRS: Korea 2000 / Unified CS
korea.sf 
:)   Simple feature collection with 250 features and 2 fields
:)   Geometry type: MULTIPOLYGON
:)   Dimension:     XY
:)   Bounding box:  xmin: 746000 ymin: 1460000 xmax: 1390000 ymax: 2070000
:)   Projected CRS: Korea 2000 / Unified CS
:)   First 10 features:
:)      SIGUNGU_CD SIGUNGU_NM                       geometry
:)   1       11010     종로구 MULTIPOLYGON (((953684 1959...
:)   2       11020       중구 MULTIPOLYGON (((957890 1952...
:)   3       11030     용산구 MULTIPOLYGON (((953114 1950...
:)   4       11040     성동구 MULTIPOLYGON (((959382 1952...
:)   5       11050     광진구 MULTIPOLYGON (((964825 1952...
:)   6       11060   동대문구 MULTIPOLYGON (((961992 1956...
:)   7       11070     중랑구 MULTIPOLYGON (((965699 1957...
:)   8       11080     성북구 MULTIPOLYGON (((954470 1959...
:)   9       11090     강북구 MULTIPOLYGON (((956319 1965...
:)   10      11100     도봉구 MULTIPOLYGON (((957671 1966...



3 background EDAs

3.1 edu level by industry and occupation

3.1.1 import regional_mdis_1

regional_mdis_1 <- read_csv("data_mdis_econsurvey/regional_mdis/2021_하반기_A형_시군구_대분류_2021.csv",
                            locale=locale('ko',encoding='euc-kr'))
regional_mdis_1 %>%
  is.na() %>%
  colSums()
:)                     만연령             교육정도코드 사업체소재지행정구역코드        10차_산업분류코드         7차_직업분류코드           종사상지위코드             행정구역코드         경제활동구분코드 
:)                          0                        0                   175354                   175354                   175354                   175354                        0                        0
colnames(regional_mdis_1) <- c("age", "edu", "workplace_cd", "ind", "occup", "hierar", "resid_cd", "empstatus") 
regional_mdis_1
:)   # A tibble: 431,235 × 8
:)        age   edu workplace_cd ind   occup hierar resid_cd empstatus
:)      <dbl> <dbl>        <dbl> <chr> <dbl>  <dbl>    <dbl>     <dbl>
:)    1   100     0           NA <NA>     NA     NA     1104         3
:)    2   100     0           NA <NA>     NA     NA     1109         3
:)    3   100     0           NA <NA>     NA     NA     1111         3
:)    4   100     0           NA <NA>     NA     NA     1111         3
:)    5   100     0           NA <NA>     NA     NA     2502         3
:)    6   100     0           NA <NA>     NA     NA     3121         3
:)    7   100     0           NA <NA>     NA     NA     3204         3
:)    8   100     0           NA <NA>     NA     NA     3232         3
:)    9   100     0           NA <NA>     NA     NA     3304         3
:)   10   100     0           NA <NA>     NA     NA     3506         3
:)   # ℹ 431,225 more rows
regional_mdis_1 <- regional_mdis_1 %>%
  mutate_if(is.numeric, as.character) %>%
  filter(!is.na(workplace_cd)) %>%
  filter(as.character(empstatus) == "1") %>%  # 취업자만
  select(-empstatus) %>%
  select(-hierar) %>%
  filter(age > 19) %>%
  filter(age < 61) %>%  
  select(-age) %>%
  relocate(resid_cd, workplace_cd, edu, occup, ind)
regional_mdis_1
:)   # A tibble: 169,365 × 5
:)      resid_cd workplace_cd edu   occup ind  
:)      <chr>    <chr>        <chr> <chr> <chr>
:)    1 3102     3102         2     9     N    
:)    2 1106     1101         3     4     I    
:)    3 1115     1102         3     3     N    
:)    4 1103     1103         3     4     I    
:)    5 1104     1104         3     9     H    
:)    6 1106     1106         3     7     P    
:)    7 1109     1109         3     3     G    
:)    8 1110     1110         3     5     G    
:)    9 1109     1111         3     8     C    
:)   10 1110     1111         3     4     I    
:)   # ℹ 169,355 more rows
regional_mdis_1 <- regional_mdis_1 %>%
  filter(str_sub(resid_cd, 1, 2) %in% c("11", "23", "31")) %>%
  filter(str_sub(workplace_cd, 1, 2) == "11")
regional_mdis_1
:)   # A tibble: 23,653 × 5
:)      resid_cd workplace_cd edu   occup ind  
:)      <chr>    <chr>        <chr> <chr> <chr>
:)    1 1106     1101         3     4     I    
:)    2 1115     1102         3     3     N    
:)    3 1103     1103         3     4     I    
:)    4 1104     1104         3     9     H    
:)    5 1106     1106         3     7     P    
:)    6 1109     1109         3     3     G    
:)    7 1110     1110         3     5     G    
:)    8 1109     1111         3     8     C    
:)    9 1110     1111         3     4     I    
:)   10 1111     1111         3     4     S    
:)   # ℹ 23,643 more rows
#1101   서울종로구
#1102   서울중구
#1103   서울용산구
#1104   서울성동구
#1105   서울광진구
#1106   서울동대문구
#1107   서울중랑구
#1108   서울성북구
#1109   서울강북구
#1110   서울도봉구
#1111   서울노원구
#1112   서울은평구
#1113   서울서대문구
#1114   서울마포구
#1115   서울양천구
#1116   서울강서구
#1117   서울구로구
#1118   서울금천구
#1119   서울영등포구
#1120   서울동작구
#1121   서울관악구
#1122   서울서초구
#1123   서울강남구
#1124   서울송파구
#1125   서울강동구
regional_mdis_1 <- regional_mdis_1 %>%
    mutate(ind = recode(ind,
           'A' = '농어업',
           'B' = '광업',
           'C' = '제조업',
           'D' = '전기가스업',
           'E' = '수도하수업',
           'F' = '건설업',
           'G' = '도소매업',
           'H' = '운수창고업',
           'I' = '숙박음식업',
           'J' = '정보통신업',
           'K' = '금융보험업',
           'L' = '부동산업',
           'M' = '전문과학기술업',
           'N' = '사업지원업',
           'O' = '공공행정',
           'P' = '교육',
           'Q' = '보건복지',
           'R' = '예술스포츠여가',
           'S' = '협회및개인서비스',
           'T' = 'nu1',
           'U' = 'nu2')) %>%
    filter(!ind %in% c("nu1", "nu2")) %>%
    filter(!ind %in% c("건설업", "농어업", "광업", "수도하수업", "전기가스업")) %>%
    
    #mutate(occup = factor(occup)) %>%
    mutate(occup = recode(occup,
           '1' = '관리직',
           '2' = '전문직',
           '3' = '사무직',
           '4' = '서비스직',
           '5' = '판매직',
           '6' = '농어업숙련직',
           '7' = '기능직',
           '8' = '기계조작직',
           '9' = '단순노무직'))  %>%
    filter(ind != '농어업숙련직') %>%
    
    mutate(edu = as.character(edu)) %>%
    mutate(edu = recode(edu,
           '0' = '무학',
           '1' = '초졸이하',
           '2' = '중졸',
           '3' = '고졸',
           '4' = '초대졸',
           '5' = '대졸',
           '6' = '대학원석사',
           '7' = '대학원박사')) %>%
    mutate(edu_college = ifelse(edu %in% c('무학', '초졸이하', '중졸', '고졸', '초대졸'), '대졸미만', '대졸이상')) 
regional_mdis_1
:)   # A tibble: 22,156 × 6
:)      resid_cd workplace_cd edu   occup      ind              edu_college
:)      <chr>    <chr>        <chr> <chr>      <chr>            <chr>      
:)    1 1106     1101         고졸  서비스직   숙박음식업       대졸미만   
:)    2 1115     1102         고졸  사무직     사업지원업       대졸미만   
:)    3 1103     1103         고졸  서비스직   숙박음식업       대졸미만   
:)    4 1104     1104         고졸  단순노무직 운수창고업       대졸미만   
:)    5 1106     1106         고졸  기능직     교육             대졸미만   
:)    6 1109     1109         고졸  사무직     도소매업         대졸미만   
:)    7 1110     1110         고졸  판매직     도소매업         대졸미만   
:)    8 1109     1111         고졸  기계조작직 제조업           대졸미만   
:)    9 1110     1111         고졸  서비스직   숙박음식업       대졸미만   
:)   10 1111     1111         고졸  서비스직   협회및개인서비스 대졸미만   
:)   # ℹ 22,146 more rows

3.1.2 import regional_mdis_2

regional_mdis_2 <- read_csv("data_mdis_econsurvey/regional_mdis/2021_상반기_A형_시군구_대분류_2021.csv",
                            locale=locale('ko',encoding='euc-kr'))
regional_mdis_2
:)   # A tibble: 435,770 × 8
:)      행정구역시군구코드 만연령 교육정도코드 `10차산업대분류코드` 사업체소재지행정구역코드 `7차_1자리직업대분류코드` 종사상지위코드 경제활동구분코드
:)                   <dbl>  <dbl>        <dbl> <chr>                                   <dbl>                     <dbl>          <dbl>            <dbl>
:)    1               1101     15            2 <NA>                                       NA                        NA             NA                3
:)    2               1101     15            2 <NA>                                       NA                        NA             NA                3
:)    3               1101     15            2 <NA>                                       NA                        NA             NA                3
:)    4               1101     15            2 <NA>                                       NA                        NA             NA                3
:)    5               1101     15            3 <NA>                                       NA                        NA             NA                3
:)    6               1101     15            3 <NA>                                       NA                        NA             NA                3
:)    7               1101     15            3 <NA>                                       NA                        NA             NA                3
:)    8               1101     15            3 <NA>                                       NA                        NA             NA                3
:)    9               1101     15            3 <NA>                                       NA                        NA             NA                3
:)   10               1101     16            2 <NA>                                       NA                        NA             NA                3
:)   # ℹ 435,760 more rows
regional_mdis_2 %>%
  is.na() %>%
  colSums()
:)         행정구역시군구코드                   만연령             교육정도코드       10차산업대분류코드 사업체소재지행정구역코드  7차_1자리직업대분류코드           종사상지위코드         경제활동구분코드 
:)                          0                        0                        0                   180846                   180846                   180846                   180846                        0
colnames(regional_mdis_2) <- c("resid_cd", "age", "edu", "ind", "workplace_cd", "occup", "hierar", "empstatus") 
regional_mdis_2
:)   # A tibble: 435,770 × 8
:)      resid_cd   age   edu ind   workplace_cd occup hierar empstatus
:)         <dbl> <dbl> <dbl> <chr>        <dbl> <dbl>  <dbl>     <dbl>
:)    1     1101    15     2 <NA>            NA    NA     NA         3
:)    2     1101    15     2 <NA>            NA    NA     NA         3
:)    3     1101    15     2 <NA>            NA    NA     NA         3
:)    4     1101    15     2 <NA>            NA    NA     NA         3
:)    5     1101    15     3 <NA>            NA    NA     NA         3
:)    6     1101    15     3 <NA>            NA    NA     NA         3
:)    7     1101    15     3 <NA>            NA    NA     NA         3
:)    8     1101    15     3 <NA>            NA    NA     NA         3
:)    9     1101    15     3 <NA>            NA    NA     NA         3
:)   10     1101    16     2 <NA>            NA    NA     NA         3
:)   # ℹ 435,760 more rows
regional_mdis_2 <- regional_mdis_2 %>%
  mutate_if(is.numeric, as.character) %>%
  filter(!is.na(workplace_cd)) %>%
  filter(as.character(empstatus) == "1") %>%  # 취업자만
  select(-empstatus) %>%
  select(-hierar) %>%
  filter(age > 19) %>%
  filter(age < 61) %>%  
  select(-age) %>%
  relocate(resid_cd, workplace_cd, edu, occup, ind)
regional_mdis_2
:)   # A tibble: 171,773 × 5
:)      resid_cd workplace_cd edu   occup ind  
:)      <chr>    <chr>        <chr> <chr> <chr>
:)    1 1101     1101         3     4     I    
:)    2 1101     1104         3     7     I    
:)    3 1101     2602         4     5     G    
:)    4 1101     1101         4     3     R    
:)    5 1101     1103         5     4     I    
:)    6 1101     1123         3     4     M    
:)    7 1101     1101         5     4     I    
:)    8 1101     1106         5     3     K    
:)    9 1101     1111         5     2     P    
:)   10 1101     1101         5     4     R    
:)   # ℹ 171,763 more rows
regional_mdis_2 <- regional_mdis_2 %>%
  filter(str_sub(resid_cd, 1, 2) %in% c("11", "23", "31")) %>%
  filter(str_sub(workplace_cd, 1, 2) == "11")
regional_mdis_2
:)   # A tibble: 23,975 × 5
:)      resid_cd workplace_cd edu   occup ind  
:)      <chr>    <chr>        <chr> <chr> <chr>
:)    1 1101     1101         3     4     I    
:)    2 1101     1104         3     7     I    
:)    3 1101     1101         4     3     R    
:)    4 1101     1103         5     4     I    
:)    5 1101     1123         3     4     M    
:)    6 1101     1101         5     4     I    
:)    7 1101     1106         5     3     K    
:)    8 1101     1111         5     2     P    
:)    9 1101     1101         5     4     R    
:)   10 1101     1101         4     7     C    
:)   # ℹ 23,965 more rows
regional_mdis_2 <- regional_mdis_2 %>%
    mutate(ind = recode(ind,
           'A' = '농어업',
           'B' = '광업',
           'C' = '제조업',
           'D' = '전기가스업',
           'E' = '수도하수업',
           'F' = '건설업',
           'G' = '도소매업',
           'H' = '운수창고업',
           'I' = '숙박음식업',
           'J' = '정보통신업',
           'K' = '금융보험업',
           'L' = '부동산업',
           'M' = '전문과학기술업',
           'N' = '사업지원업',
           'O' = '공공행정',
           'P' = '교육',
           'Q' = '보건복지',
           'R' = '예술스포츠여가',
           'S' = '협회및개인서비스',
           'T' = 'nu1',
           'U' = 'nu2')) %>%
    filter(!ind %in% c("nu1", "nu2")) %>%
    filter(!ind %in% c("건설업", "농어업", "광업", "수도하수업", "전기가스업")) %>%
    
    #mutate(occup = factor(occup)) %>%
    mutate(occup = recode(occup,
           '1' = '관리직',
           '2' = '전문직',
           '3' = '사무직',
           '4' = '서비스직',
           '5' = '판매직',
           '6' = '농어업숙련직',
           '7' = '기능직',
           '8' = '기계조작직',
           '9' = '단순노무직'))  %>%
    filter(ind != '농어업숙련직') %>%
    
    mutate(edu = as.character(edu)) %>%
    mutate(edu = recode(edu,
           '0' = '무학',
           '1' = '초졸이하',
           '2' = '중졸',
           '3' = '고졸',
           '4' = '초대졸',
           '5' = '대졸',
           '6' = '대학원석사',
           '7' = '대학원박사')) %>%
    mutate(edu_college = ifelse(edu %in% c('무학', '초졸이하', '중졸', '고졸', '초대졸'), '대졸미만', '대졸이상')) 
regional_mdis_2
:)   # A tibble: 22,427 × 6
:)      resid_cd workplace_cd edu    occup    ind            edu_college
:)      <chr>    <chr>        <chr>  <chr>    <chr>          <chr>      
:)    1 1101     1101         고졸   서비스직 숙박음식업     대졸미만   
:)    2 1101     1104         고졸   기능직   숙박음식업     대졸미만   
:)    3 1101     1101         초대졸 사무직   예술스포츠여가 대졸미만   
:)    4 1101     1103         대졸   서비스직 숙박음식업     대졸이상   
:)    5 1101     1123         고졸   서비스직 전문과학기술업 대졸미만   
:)    6 1101     1101         대졸   서비스직 숙박음식업     대졸이상   
:)    7 1101     1106         대졸   사무직   금융보험업     대졸이상   
:)    8 1101     1111         대졸   전문직   교육           대졸이상   
:)    9 1101     1101         대졸   서비스직 예술스포츠여가 대졸이상   
:)   10 1101     1101         초대졸 기능직   제조업         대졸미만   
:)   # ℹ 22,417 more rows

3.1.3 integrate

regional_mdis <- regional_mdis_1 %>%
  bind_rows(regional_mdis_2)
regional_mdis
:)   # A tibble: 44,583 × 6
:)      resid_cd workplace_cd edu   occup      ind              edu_college
:)      <chr>    <chr>        <chr> <chr>      <chr>            <chr>      
:)    1 1106     1101         고졸  서비스직   숙박음식업       대졸미만   
:)    2 1115     1102         고졸  사무직     사업지원업       대졸미만   
:)    3 1103     1103         고졸  서비스직   숙박음식업       대졸미만   
:)    4 1104     1104         고졸  단순노무직 운수창고업       대졸미만   
:)    5 1106     1106         고졸  기능직     교육             대졸미만   
:)    6 1109     1109         고졸  사무직     도소매업         대졸미만   
:)    7 1110     1110         고졸  판매직     도소매업         대졸미만   
:)    8 1109     1111         고졸  기계조작직 제조업           대졸미만   
:)    9 1110     1111         고졸  서비스직   숙박음식업       대졸미만   
:)   10 1111     1111         고졸  서비스직   협회및개인서비스 대졸미만   
:)   # ℹ 44,573 more rows
regional_mdis <- regional_mdis %>%
  group_by(occup, ind, edu_college) %>%
  summarise(count = n())
regional_mdis
:)   # A tibble: 220 × 4
:)   # Groups:   occup, ind [113]
:)      occup  ind        edu_college count
:)      <chr>  <chr>      <chr>       <int>
:)    1 관리직 공공행정   대졸이상       19
:)    2 관리직 교육       대졸미만        1
:)    3 관리직 교육       대졸이상       29
:)    4 관리직 금융보험업 대졸미만        7
:)    5 관리직 금융보험업 대졸이상      101
:)    6 관리직 도소매업   대졸미만        6
:)    7 관리직 도소매업   대졸이상       79
:)    8 관리직 보건복지   대졸이상       12
:)    9 관리직 부동산업   대졸미만        1
:)   10 관리직 부동산업   대졸이상       12
:)   # ℹ 210 more rows

3.1.4 edu level by industry

edu_by_industry <- regional_mdis %>%
    ungroup() %>%
    group_by(ind, edu_college) %>%
    summarise(count = sum(count))

edu_by_industry <- edu_by_industry %>%
    pivot_wider(names_from = edu_college, values_from = count) %>%
    ungroup() 

colnames(edu_by_industry) <- c("ind", "below", "above")
edu_by_industry
:)   # A tibble: 14 × 3
:)      ind              below above
:)      <chr>            <int> <int>
:)    1 공공행정           466  1153
:)    2 교육               587  3255
:)    3 금융보험업         841  2104
:)    4 도소매업          3758  3910
:)    5 보건복지          1705  2116
:)    6 부동산업           439   530
:)    7 사업지원업        1384   958
:)    8 숙박음식업        2181  1194
:)    9 예술스포츠여가     324   648
:)   10 운수창고업        1200   857
:)   11 전문과학기술업    1017  4029
:)   12 정보통신업        1083  3702
:)   13 제조업            1869  1529
:)   14 협회및개인서비스  1006   738
edu_by_industry <- edu_by_industry %>%
    mutate(college_and_above = above / (below + above)) %>%
    select(ind, college_and_above)

edu_by_industry %>%
  arrange(desc(college_and_above))
:)   # A tibble: 14 × 2
:)      ind              college_and_above
:)      <chr>                        <dbl>
:)    1 교육                         0.847
:)    2 전문과학기술업               0.798
:)    3 정보통신업                   0.774
:)    4 금융보험업                   0.714
:)    5 공공행정                     0.712
:)    6 예술스포츠여가               0.667
:)    7 보건복지                     0.554
:)    8 부동산업                     0.547
:)    9 도소매업                     0.510
:)   10 제조업                       0.450
:)   11 협회및개인서비스             0.423
:)   12 운수창고업                   0.417
:)   13 사업지원업                   0.409
:)   14 숙박음식업                   0.354

3.1.5 edu level by occupation

edu_by_occupation <- regional_mdis %>%
    ungroup() %>%
    group_by(occup, edu_college) %>%
    summarise(count = sum(count)) 

edu_by_occupation <- edu_by_occupation %>%
    pivot_wider(names_from = edu_college, values_from = count) %>%
    ungroup()

colnames(edu_by_occupation) <- c("occup", "below", "above")
edu_by_occupation
:)   # A tibble: 9 × 3
:)     occup        below above
:)     <chr>        <int> <int>
:)   1 관리직          50   489
:)   2 기계조작직    1295   351
:)   3 기능직        1531   376
:)   4 농어업숙련직     7     6
:)   5 단순노무직    2147   572
:)   6 사무직        3759  8321
:)   7 서비스직      2918  1655
:)   8 전문직        3012 12710
:)   9 판매직        3141  2243
edu_by_occupation <- edu_by_occupation %>%
    mutate(college_and_above = above / (below + above)) %>%
    select(occup, college_and_above)

edu_by_occupation %>%
  arrange(college_and_above)
:)   # A tibble: 9 × 2
:)     occup        college_and_above
:)     <chr>                    <dbl>
:)   1 기능직                   0.197
:)   2 단순노무직               0.210
:)   3 기계조작직               0.213
:)   4 서비스직                 0.362
:)   5 판매직                   0.417
:)   6 농어업숙련직             0.462
:)   7 사무직                   0.689
:)   8 전문직                   0.808
:)   9 관리직                   0.907
#관리자    Managers
#전문가 및 관련 종사자   Professionals and Related Workers
#사무 종사자 Clerks
#서비스 종사자    Service Workers
#판매 종사자 Sales Workers
#농림·어업 숙련 종사자   Skilled Agricultural, Forestry and Fishery Workers
#기능원 및 관련 기능 종사자    Craft and Related Trades Workers
#장치·기계 조작 및 조립 종사자  Equipment, Machine Operating and Assembling Workers
#단순노무 종사자   Elementary Workers
edu_by_occupation <- edu_by_occupation %>%
    mutate(occup = recode(occup,
           '관리직' = 'Managerial',
           '전문직' = 'Professional',
           '사무직' = 'Clerical',
           '서비스직' = 'Service',
           '판매직' = 'Sales',
           '농어업숙련직' = 'Agricultural\nand fishery',
           '기능직' = 'Craft',
           '기계조작직' = 'Machine\n operating',
           '단순노무직' = 'Elemetrary'))  
edu_by_occupation %>%
    arrange(college_and_above) 
:)   # A tibble: 9 × 2
:)     occup                       college_and_above
:)     <chr>                                   <dbl>
:)   1 "Craft"                                 0.197
:)   2 "Elemetrary"                            0.210
:)   3 "Machine\n operating"                   0.213
:)   4 "Service"                               0.362
:)   5 "Sales"                                 0.417
:)   6 "Agricultural\nand fishery"             0.462
:)   7 "Clerical"                              0.689
:)   8 "Professional"                          0.808
:)   9 "Managerial"                            0.907

3.1.6 edu level by occupation (correlation)

regional_mdis <- regional_mdis_1 %>%
  bind_rows(regional_mdis_2)
regional_mdis
:)   # A tibble: 44,583 × 6
:)      resid_cd workplace_cd edu   occup      ind              edu_college
:)      <chr>    <chr>        <chr> <chr>      <chr>            <chr>      
:)    1 1106     1101         고졸  서비스직   숙박음식업       대졸미만   
:)    2 1115     1102         고졸  사무직     사업지원업       대졸미만   
:)    3 1103     1103         고졸  서비스직   숙박음식업       대졸미만   
:)    4 1104     1104         고졸  단순노무직 운수창고업       대졸미만   
:)    5 1106     1106         고졸  기능직     교육             대졸미만   
:)    6 1109     1109         고졸  사무직     도소매업         대졸미만   
:)    7 1110     1110         고졸  판매직     도소매업         대졸미만   
:)    8 1109     1111         고졸  기계조작직 제조업           대졸미만   
:)    9 1110     1111         고졸  서비스직   숙박음식업       대졸미만   
:)   10 1111     1111         고졸  서비스직   협회및개인서비스 대졸미만   
:)   # ℹ 44,573 more rows
regional_mdis <- regional_mdis %>%
  group_by(resid_cd, edu_college, occup, ind) %>%
  summarise(count = n())
regional_mdis
:)   # A tibble: 5,430 × 5
:)   # Groups:   resid_cd, edu_college, occup [913]
:)      resid_cd edu_college occup      ind              count
:)      <chr>    <chr>       <chr>      <chr>            <int>
:)    1 1101     대졸미만    기계조작직 부동산업             1
:)    2 1101     대졸미만    기계조작직 숙박음식업           2
:)    3 1101     대졸미만    기계조작직 운수창고업           5
:)    4 1101     대졸미만    기계조작직 제조업               6
:)    5 1101     대졸미만    기능직     도소매업             3
:)    6 1101     대졸미만    기능직     사업지원업           3
:)    7 1101     대졸미만    기능직     숙박음식업           2
:)    8 1101     대졸미만    기능직     제조업              54
:)    9 1101     대졸미만    기능직     협회및개인서비스     5
:)   10 1101     대졸미만    단순노무직 공공행정             7
:)   # ℹ 5,420 more rows
region_specific_edu <- regional_mdis %>%
  ungroup() %>%
  group_by(resid_cd, edu_college) %>%
  summarise(count = n()) %>%
  pivot_wider(names_from = edu_college, values_from = count) %>%
  ungroup()
region_specific_edu
:)   # A tibble: 66 × 3
:)      resid_cd 대졸미만 대졸이상
:)      <chr>       <int>    <int>
:)    1 1101           52       61
:)    2 1102           53       52
:)    3 1103           58       64
:)    4 1104           56       55
:)    5 1105           62       63
:)    6 1106           63       59
:)    7 1107           68       57
:)    8 1108           65       59
:)    9 1109           62       58
:)   10 1110           60       56
:)   # ℹ 56 more rows
colnames(region_specific_edu) <- c("resid_cd", "below", "above")
region_specific_edu
:)   # A tibble: 66 × 3
:)      resid_cd below above
:)      <chr>    <int> <int>
:)    1 1101        52    61
:)    2 1102        53    52
:)    3 1103        58    64
:)    4 1104        56    55
:)    5 1105        62    63
:)    6 1106        63    59
:)    7 1107        68    57
:)    8 1108        65    59
:)    9 1109        62    58
:)   10 1110        60    56
:)   # ℹ 56 more rows
region_specific_edu <- region_specific_edu %>%
    mutate(college_and_above = above / (below + above)) %>%
    select(resid_cd, college_and_above)
region_specific_edu
:)   # A tibble: 66 × 2
:)      resid_cd college_and_above
:)      <chr>                <dbl>
:)    1 1101                 0.540
:)    2 1102                 0.495
:)    3 1103                 0.525
:)    4 1104                 0.495
:)    5 1105                 0.504
:)    6 1106                 0.484
:)    7 1107                 0.456
:)    8 1108                 0.476
:)    9 1109                 0.483
:)   10 1110                 0.483
:)   # ℹ 56 more rows
region_specific_occup <- regional_mdis %>%
  ungroup() %>%
  group_by(resid_cd, occup) %>%
  summarise(count = n()) %>%
  mutate(proportion = count / sum(count)) %>%
  select(-count)
region_specific_occup
:)   # A tibble: 496 × 3
:)   # Groups:   resid_cd [66]
:)      resid_cd occup      proportion
:)      <chr>    <chr>           <dbl>
:)    1 1101     관리직         0.0354
:)    2 1101     기계조작직     0.0796
:)    3 1101     기능직         0.0796
:)    4 1101     단순노무직     0.133 
:)    5 1101     사무직         0.239 
:)    6 1101     서비스직       0.142 
:)    7 1101     전문직         0.195 
:)    8 1101     판매직         0.0973
:)    9 1102     관리직         0.0381
:)   10 1102     기계조작직     0.0857
:)   # ℹ 486 more rows

3.1.6.1 visualize

region_specific_edu
:)   # A tibble: 66 × 2
:)      resid_cd college_and_above
:)      <chr>                <dbl>
:)    1 1101                 0.540
:)    2 1102                 0.495
:)    3 1103                 0.525
:)    4 1104                 0.495
:)    5 1105                 0.504
:)    6 1106                 0.484
:)    7 1107                 0.456
:)    8 1108                 0.476
:)    9 1109                 0.483
:)   10 1110                 0.483
:)   # ℹ 56 more rows
region_specific_occup
:)   # A tibble: 496 × 3
:)   # Groups:   resid_cd [66]
:)      resid_cd occup      proportion
:)      <chr>    <chr>           <dbl>
:)    1 1101     관리직         0.0354
:)    2 1101     기계조작직     0.0796
:)    3 1101     기능직         0.0796
:)    4 1101     단순노무직     0.133 
:)    5 1101     사무직         0.239 
:)    6 1101     서비스직       0.142 
:)    7 1101     전문직         0.195 
:)    8 1101     판매직         0.0973
:)    9 1102     관리직         0.0381
:)   10 1102     기계조작직     0.0857
:)   # ℹ 486 more rows
region_specific <- region_specific_occup %>%
  left_join(region_specific_edu, by = "resid_cd")
region_specific
:)   # A tibble: 496 × 4
:)   # Groups:   resid_cd [66]
:)      resid_cd occup      proportion college_and_above
:)      <chr>    <chr>           <dbl>             <dbl>
:)    1 1101     관리직         0.0354             0.540
:)    2 1101     기계조작직     0.0796             0.540
:)    3 1101     기능직         0.0796             0.540
:)    4 1101     단순노무직     0.133              0.540
:)    5 1101     사무직         0.239              0.540
:)    6 1101     서비스직       0.142              0.540
:)    7 1101     전문직         0.195              0.540
:)    8 1101     판매직         0.0973             0.540
:)    9 1102     관리직         0.0381             0.495
:)   10 1102     기계조작직     0.0857             0.495
:)   # ℹ 486 more rows

3.2 WFH during COVID by industry and edu level

3.2.1 2019

wfh_status_19 <- read_csv("data_mdis_econsurvey/8월_근로형태별_20221016_99633_데이터/2019_8월_근로형태별_20221016_99633.csv", locale=locale('ko',encoding='euc-kr'))

colnames(wfh_status_19)
:)    [1] "동부읍면부코드"                    "교육정도_학력코드"                 "교육정도_계열코드"                 "현재일관련사항_10차산업대분류코드" "현재일관련사항_종사자규모코드"     "현재일관련사항_7차직업대분류코드"  "현재일관련사항_종사상지위코드"     "교육정도컨버젼코드"                "만연령"                            "근로장소코드"                      "유연근무제_활용여부"               "유연근무제_활용형태1코드"          "유연근무제_활용형태2코드"         
:)   [14] "유연근무제_향후활용예정형태1코드"  "유연근무제_향후활용예정형태2코드"  "연령계층코드"                      "가중값"                            "조사연월"                          "가구구분코드"
wfh_status_19 <- wfh_status_19 %>%
    select(c(1,
             4, 6,
             8, 9,
             11, 12))
wfh_status_19
:)   # A tibble: 60,159 × 7
:)      동부읍면부코드 현재일관련사항_10차산업대분류코드 현재일관련사항_7차직업대분류코드 교육정도컨버젼코드 만연령 유연근무제_활용여부 유연근무제_활용형태1코드
:)               <dbl> <chr>                                                        <dbl>              <dbl> <chr>                <dbl>                    <dbl>
:)    1              1 0                                                                0                  1 030                      0                        0
:)    2              1 0                                                                0                  1 033                      0                        0
:)    3              1 0                                                                0                  1 036                      0                        0
:)    4              1 0                                                                0                  1 038                      0                        0
:)    5              1 0                                                                0                  1 048                      0                        0
:)    6              1 0                                                                0                  1 049                      0                        0
:)    7              1 0                                                                0                  1 050                      0                        0
:)    8              1 0                                                                0                  1 051                      0                        0
:)    9              1 0                                                                0                  1 052                      0                        0
:)   10              1 0                                                                0                  1 052                      0                        0
:)   # ℹ 60,149 more rows
colnames(wfh_status_19) <- c("dong_ep_myeon", "ind", "occup", "edu", "age", "flex_work_yesno", "flex_work_type")

wfh_status_19 <- wfh_status_19 %>%
    mutate(ind = as.character(ind),
           occup = as.character(occup),
           edu = as.character(edu)) %>%
    filter(ind != '0') %>%
    filter(occup != '0') %>%
    filter(edu != '0') %>%
    filter(dong_ep_myeon == '1') %>%
    mutate(age = as.numeric(age)) %>%
    filter(age >= 20 & age < 61)

wfh_status_19 <- wfh_status_19 %>%
    mutate(ind = recode(ind,
           'A' = '농어업',
           'B' = '광업',
           'C' = '제조업',
           'D' = '전기가스업',
           'E' = '수도하수업',
           'F' = '건설업',
           'G' = '도소매업',
           'H' = '운수창고업',
           'I' = '숙박음식업',
           'J' = '정보통신업',
           'K' = '금융보험업',
           'L' = '부동산업',
           'M' = '전문과학기술업',
           'N' = '사업지원업',
           'O' = '공공행정',
           'P' = '교육',
           'Q' = '보건복지',
           'R' = '예술스포츠여가',
           'S' = '협회및개인서비스',
           'T' = 'nu1',
           'U' = 'nu2')) %>%
    filter(!ind %in% c("nu1", "nu2")) %>%
    filter(!ind %in% c("농어업", "광업", "수도하수업", "전기가스업")) %>%
    
    mutate(occup = as.character(occup)) %>%
    mutate(occup = recode(occup,
           '1' = '관리직',
           '2' = '전문직',
           '3' = '사무직',
           '4' = '서비스직',
           '5' = '판매직',
           '6' = '농어업숙련직',
           '7' = '기능직',
           '8' = '기계조작직',
           '9' = '단순노무직')) %>%
    
    mutate(edu = as.character(edu)) %>%
    mutate(edu = recode(edu,
           '1' = '초졸이하',
           '2' = '중졸',
           '3' = '고졸',
           '4' = '초대졸',
           '5' = '대졸',
           '6' = '대학원졸')) %>%
    mutate(edu_college = ifelse(edu %in% c('초졸이하', '중졸', '고졸', '초대졸'), '대졸미만', '대졸이상'))

wfh_status_19 <- wfh_status_19 %>%
    mutate(flex_work_yesno = as.character(flex_work_yesno)) %>%
    mutate(flex_work_yesno = recode(flex_work_yesno,
           '0' = '없음',
           '1' = '예',
           '2' = '아니오')) %>%
    mutate(flex_work_type = recode(flex_work_type,
           '0' = '없음',
           '1' = '근로시간단축근무제',
           '2' = '시차출퇴근제',
           '3' = '선택적근무시간제',
           '4' = '재택및원격근무제',
           '5' = '탄력적근무제',
           '6' = '기타유형')) %>%
    mutate(flex_work_type_isWFH = if_else(flex_work_type == '재택및원격근무제', 1, 0)) %>%
    group_by(ind, edu_college) %>%
    summarise(count = n(),
              WFH = sum(flex_work_type_isWFH)) %>%
    mutate(WFH_ratio_19 = WFH / count * 100)
wfh_status_19
:)   # A tibble: 30 × 5
:)   # Groups:   ind [15]
:)      ind        edu_college count   WFH WFH_ratio_19
:)      <chr>      <chr>       <int> <dbl>        <dbl>
:)    1 건설업     대졸미만     1271     0        0    
:)    2 건설업     대졸이상      435     0        0    
:)    3 공공행정   대졸미만      297     0        0    
:)    4 공공행정   대졸이상      659     2        0.303
:)    5 교육       대졸미만      406     1        0.246
:)    6 교육       대졸이상     1443     5        0.347
:)    7 금융보험업 대졸미만      333     1        0.300
:)    8 금융보험업 대졸이상      438     3        0.685
:)    9 도소매업   대졸미만     2040     4        0.196
:)   10 도소매업   대졸이상      979     5        0.511
:)   # ℹ 20 more rows

3.2.2 2020

wfh_status_20 <- read_csv("data_mdis_econsurvey/8월_근로형태별_20221016_99633_데이터/2020_8월_근로형태별_20221016_99633.csv", locale=locale('ko',encoding='euc-kr'))

colnames(wfh_status_20)
:)    [1] "동부읍면부코드"                    "교육정도_학력코드"                 "교육정도_계열코드"                 "현재일관련사항_10차산업대분류코드" "현재일관련사항_종사자규모코드"     "현재일관련사항_7차직업대분류코드"  "현재일관련사항_종사상지위코드"     "교육정도컨버젼코드"                "만연령"                            "근로장소코드"                      "유연근무제_활용여부"               "유연근무제_활용형태1코드"          "유연근무제_활용형태2코드"         
:)   [14] "유연근무제_향후활용예정형태1코드"  "유연근무제_향후활용예정형태2코드"  "연령계층코드"                      "가중값"                            "조사연월"                          "가구구분코드"
wfh_status_20 <- wfh_status_20 %>%
    select(c(1,
             4, 6,
             8, 9,
             11, 12))
wfh_status_20
:)   # A tibble: 59,546 × 7
:)      동부읍면부코드 현재일관련사항_10차산업대분류코드 현재일관련사항_7차직업대분류코드 교육정도컨버젼코드 만연령 유연근무제_활용여부 유연근무제_활용형태1코드
:)               <dbl> <chr>                                                        <dbl>              <dbl> <chr>                <dbl>                    <dbl>
:)    1              1 0                                                                0                  1 028                      0                        0
:)    2              1 0                                                                0                  1 031                      0                        0
:)    3              1 0                                                                0                  1 038                      0                        0
:)    4              1 0                                                                0                  1 039                      0                        0
:)    5              1 0                                                                0                  1 048                      0                        0
:)    6              1 0                                                                0                  1 049                      0                        0
:)    7              1 0                                                                0                  1 049                      0                        0
:)    8              1 0                                                                0                  1 051                      0                        0
:)    9              1 0                                                                0                  1 051                      0                        0
:)   10              1 0                                                                0                  1 052                      0                        0
:)   # ℹ 59,536 more rows
colnames(wfh_status_20) <- c("dong_ep_myeon", "ind", "occup", "edu", "age", "flex_work_yesno", "flex_work_type")

wfh_status_20 <- wfh_status_20 %>%
    mutate(ind = as.character(ind),
           occup = as.character(occup),
           edu = as.character(edu)) %>%
    filter(ind != '0') %>%
    filter(occup != '0') %>%
    filter(edu != '0') %>%
    filter(dong_ep_myeon == '1') %>%
    mutate(age = as.numeric(age)) %>%
    filter(age >= 20 & age < 61)

wfh_status_20 <- wfh_status_20 %>%
    mutate(ind = recode(ind,
           'A' = '농어업',
           'B' = '광업',
           'C' = '제조업',
           'D' = '전기가스업',
           'E' = '수도하수업',
           'F' = '건설업',
           'G' = '도소매업',
           'H' = '운수창고업',
           'I' = '숙박음식업',
           'J' = '정보통신업',
           'K' = '금융보험업',
           'L' = '부동산업',
           'M' = '전문과학기술업',
           'N' = '사업지원업',
           'O' = '공공행정',
           'P' = '교육',
           'Q' = '보건복지',
           'R' = '예술스포츠여가',
           'S' = '협회및개인서비스',
           'T' = 'nu1',
           'U' = 'nu2')) %>%
    filter(!ind %in% c("nu1", "nu2")) %>%
    filter(!ind %in% c("농어업", "광업", "수도하수업", "전기가스업")) %>%
    
    mutate(occup = as.character(occup)) %>%
    mutate(occup = recode(occup,
           '1' = '관리직',
           '2' = '전문직',
           '3' = '사무직',
           '4' = '서비스직',
           '5' = '판매직',
           '6' = '농어업숙련직',
           '7' = '기능직',
           '8' = '기계조작직',
           '9' = '단순노무직')) %>%
    
    mutate(edu = as.character(edu)) %>%
    mutate(edu = recode(edu,
           '1' = '초졸이하',
           '2' = '중졸',
           '3' = '고졸',
           '4' = '초대졸',
           '5' = '대졸',
           '6' = '대학원졸')) %>%
    mutate(edu_college = ifelse(edu %in% c('초졸이하', '중졸', '고졸', '초대졸'), '대졸미만', '대졸이상'))

wfh_status_20 <- wfh_status_20 %>%
    mutate(flex_work_yesno = as.character(flex_work_yesno)) %>%
    mutate(flex_work_yesno = recode(flex_work_yesno,
           '0' = '없음',
           '1' = '예',
           '2' = '아니오')) %>%
    mutate(flex_work_type = recode(flex_work_type,
           '0' = '없음',
           '1' = '근로시간단축근무제',
           '2' = '시차출퇴근제',
           '3' = '선택적근무시간제',
           '4' = '재택및원격근무제',
           '5' = '탄력적근무제',
           '6' = '기타유형')) %>%
    mutate(flex_work_type_isWFH = if_else(flex_work_type == '재택및원격근무제', 1, 0)) %>%
    group_by(ind, edu_college) %>%
    summarise(count = n(),
              WFH = sum(flex_work_type_isWFH)) %>%
    mutate(WFH_ratio_20 = WFH / count * 100)
wfh_status_20
:)   # A tibble: 30 × 5
:)   # Groups:   ind [15]
:)      ind        edu_college count   WFH WFH_ratio_20
:)      <chr>      <chr>       <int> <dbl>        <dbl>
:)    1 건설업     대졸미만     1325     3        0.226
:)    2 건설업     대졸이상      425     2        0.471
:)    3 공공행정   대졸미만      317     3        0.946
:)    4 공공행정   대졸이상      683    26        3.81 
:)    5 교육       대졸미만      401     3        0.748
:)    6 교육       대졸이상     1393    76        5.46 
:)    7 금융보험업 대졸미만      305     9        2.95 
:)    8 금융보험업 대졸이상      421    11        2.61 
:)    9 도소매업   대졸미만     1940     6        0.309
:)   10 도소매업   대졸이상      935    19        2.03 
:)   # ℹ 20 more rows

3.2.3 2021

wfh_status_21 <- read_csv("data_mdis_econsurvey/8월_근로형태별_20221016_99633_데이터/2021_8월_근로형태별_20221016_99633.csv", locale=locale('ko',encoding='euc-kr'))

colnames(wfh_status_21)
:)    [1] "동부읍면부코드"                    "교육정도_학력코드"                 "교육정도_계열코드"                 "현재일관련사항_10차산업대분류코드" "현재일관련사항_종사자규모코드"     "현재일관련사항_7차직업대분류코드"  "현재일관련사항_종사상지위코드"     "교육정도컨버젼코드"                "만연령"                            "근로장소코드"                      "유연근무제_활용여부"               "유연근무제_활용형태1코드"          "유연근무제_활용형태2코드"         
:)   [14] "유연근무제_향후활용예정형태1코드"  "유연근무제_향후활용예정형태2코드"  "연령계층코드"                      "가중값"                            "조사연월"                          "가구구분코드"
wfh_status_21 <- wfh_status_21 %>%
    select(c(1,
             4, 6,
             8, 9,
             11, 12))
wfh_status_21
:)   # A tibble: 58,882 × 7
:)      동부읍면부코드 현재일관련사항_10차산업대분류코드 현재일관련사항_7차직업대분류코드 교육정도컨버젼코드 만연령 유연근무제_활용여부 유연근무제_활용형태1코드
:)               <dbl> <chr>                                                        <dbl>              <dbl> <chr>                <dbl>                    <dbl>
:)    1              1 0                                                                0                  1 031                      0                        0
:)    2              1 0                                                                0                  1 032                      0                        0
:)    3              1 0                                                                0                  1 035                      0                        0
:)    4              1 0                                                                0                  1 039                      0                        0
:)    5              1 0                                                                0                  1 039                      0                        0
:)    6              1 0                                                                0                  1 040                      0                        0
:)    7              1 0                                                                0                  1 040                      0                        0
:)    8              1 0                                                                0                  1 047                      0                        0
:)    9              1 0                                                                0                  1 050                      0                        0
:)   10              1 0                                                                0                  1 052                      0                        0
:)   # ℹ 58,872 more rows
colnames(wfh_status_21) <- c("dong_ep_myeon", "ind", "occup", "edu", "age", "flex_work_yesno", "flex_work_type")

wfh_status_21 <- wfh_status_21 %>%
    mutate(ind = as.character(ind),
           occup = as.character(occup),
           edu = as.character(edu)) %>%
    filter(ind != '0') %>%
    filter(occup != '0') %>%
    filter(edu != '0') %>%
    filter(dong_ep_myeon == '1') %>%
    mutate(age = as.numeric(age)) %>%
    filter(age >= 20 & age < 61)

wfh_status_21 <- wfh_status_21 %>%
    mutate(ind = recode(ind,
           'A' = '농어업',
           'B' = '광업',
           'C' = '제조업',
           'D' = '전기가스업',
           'E' = '수도하수업',
           'F' = '건설업',
           'G' = '도소매업',
           'H' = '운수창고업',
           'I' = '숙박음식업',
           'J' = '정보통신업',
           'K' = '금융보험업',
           'L' = '부동산업',
           'M' = '전문과학기술업',
           'N' = '사업지원업',
           'O' = '공공행정',
           'P' = '교육',
           'Q' = '보건복지',
           'R' = '예술스포츠여가',
           'S' = '협회및개인서비스',
           'T' = 'nu1',
           'U' = 'nu2')) %>%
    filter(!ind %in% c("nu1", "nu2")) %>%
    filter(!ind %in% c("농어업", "광업", "수도하수업", "전기가스업")) %>%
    
    mutate(occup = as.character(occup)) %>%
    mutate(occup = recode(occup,
           '1' = '관리직',
           '2' = '전문직',
           '3' = '사무직',
           '4' = '서비스직',
           '5' = '판매직',
           '6' = '농어업숙련직',
           '7' = '기능직',
           '8' = '기계조작직',
           '9' = '단순노무직')) %>%
    
    mutate(edu = as.character(edu)) %>%
    mutate(edu = recode(edu,
           '1' = '초졸이하',
           '2' = '중졸',
           '3' = '고졸',
           '4' = '초대졸',
           '5' = '대졸',
           '6' = '대학원졸')) %>%
    mutate(edu_college = ifelse(edu %in% c('초졸이하', '중졸', '고졸', '초대졸'), '대졸미만', '대졸이상'))

wfh_status_21 <- wfh_status_21 %>%
    mutate(flex_work_yesno = as.character(flex_work_yesno)) %>%
    mutate(flex_work_yesno = recode(flex_work_yesno,
           '0' = '없음',
           '1' = '예',
           '2' = '아니오')) %>%
    mutate(flex_work_type = recode(flex_work_type,
           '0' = '없음',
           '1' = '근로시간단축근무제',
           '2' = '시차출퇴근제',
           '3' = '선택적근무시간제',
           '4' = '재택및원격근무제',
           '5' = '탄력적근무제',
           '6' = '기타유형')) %>%
    mutate(flex_work_type_isWFH = if_else(flex_work_type == '재택및원격근무제', 1, 0)) %>%
    group_by(ind, edu_college) %>%
    summarise(count = n(),
              WFH = sum(flex_work_type_isWFH)) %>%
    mutate(WFH_ratio_21 = WFH / count * 100)
wfh_status_21
:)   # A tibble: 30 × 5
:)   # Groups:   ind [15]
:)      ind        edu_college count   WFH WFH_ratio_21
:)      <chr>      <chr>       <int> <dbl>        <dbl>
:)    1 건설업     대졸미만     1339     3        0.224
:)    2 건설업     대졸이상      426    11        2.58 
:)    3 공공행정   대졸미만      307    11        3.58 
:)    4 공공행정   대졸이상      713    47        6.59 
:)    5 교육       대졸미만      420     6        1.43 
:)    6 교육       대졸이상     1434   111        7.74 
:)    7 금융보험업 대졸미만      315    22        6.98 
:)    8 금융보험업 대졸이상      384    45       11.7  
:)    9 도소매업   대졸미만     1847     6        0.325
:)   10 도소매업   대졸이상      906    31        3.42 
:)   # ℹ 20 more rows

3.2.4 stats

3.2.4.1 제조업

wfh_status_21 %>%
  ungroup() %>%
  filter(ind == "제조업") 
:)   # A tibble: 2 × 5
:)     ind    edu_college count   WFH WFH_ratio_21
:)     <chr>  <chr>       <int> <dbl>        <dbl>
:)   1 제조업 대졸미만     2437    16        0.657
:)   2 제조업 대졸이상     1176    45        3.83
wfh_status_21 %>%
  ungroup() %>%
  filter(ind == "제조업") %>%
  ungroup() %>%
  group_by(ind) %>%
  summarise(count = sum(count),
            WFH = sum(WFH)) %>%
  mutate(WFH / count * 100)
:)   # A tibble: 1 × 4
:)     ind    count   WFH `WFH/count * 100`
:)     <chr>  <int> <dbl>             <dbl>
:)   1 제조업  3613    61              1.69

3.2.4.2 금융보험업

wfh_status_21 %>%
  ungroup() %>%
  filter(ind == "금융보험업")
:)   # A tibble: 2 × 5
:)     ind        edu_college count   WFH WFH_ratio_21
:)     <chr>      <chr>       <int> <dbl>        <dbl>
:)   1 금융보험업 대졸미만      315    22         6.98
:)   2 금융보험업 대졸이상      384    45        11.7
wfh_status_21 %>%
  ungroup() %>%
  filter(ind == "금융보험업") %>%
  ungroup() %>%
  group_by(ind) %>%
  summarise(count = sum(count),
            WFH = sum(WFH)) %>%
  mutate(WFH / count * 100)
:)   # A tibble: 1 × 4
:)     ind        count   WFH `WFH/count * 100`
:)     <chr>      <int> <dbl>             <dbl>
:)   1 금융보험업   699    67              9.59

3.2.4.3 도소매업

wfh_status_21 %>%
  ungroup() %>%
  filter(ind == "도소매업")
:)   # A tibble: 2 × 5
:)     ind      edu_college count   WFH WFH_ratio_21
:)     <chr>    <chr>       <int> <dbl>        <dbl>
:)   1 도소매업 대졸미만     1847     6        0.325
:)   2 도소매업 대졸이상      906    31        3.42
wfh_status_21 %>%
  ungroup() %>%
  filter(ind == "도소매업") %>%
  ungroup() %>%
  group_by(ind) %>%
  summarise(count = sum(count),
            WFH = sum(WFH)) %>%
  mutate(WFH / count * 100)
:)   # A tibble: 1 × 4
:)     ind      count   WFH `WFH/count * 100`
:)     <chr>    <int> <dbl>             <dbl>
:)   1 도소매업  2753    37              1.34

3.2.4.4 숙박음식업

wfh_status_21 %>%
  ungroup() %>%
  filter(ind == "숙박음식업")
:)   # A tibble: 2 × 5
:)     ind        edu_college count   WFH WFH_ratio_21
:)     <chr>      <chr>       <int> <dbl>        <dbl>
:)   1 숙박음식업 대졸미만     1328     0        0    
:)   2 숙박음식업 대졸이상      367     2        0.545
wfh_status_21 %>%
  ungroup() %>%
  filter(ind == "숙박음식업") %>%
  ungroup() %>%
  group_by(ind) %>%
  summarise(count = sum(count),
            WFH = sum(WFH)) %>%
  mutate(WFH / count * 100)
:)   # A tibble: 1 × 4
:)     ind        count   WFH `WFH/count * 100`
:)     <chr>      <int> <dbl>             <dbl>
:)   1 숙박음식업  1695     2             0.118

3.2.4.5 한번에

wfh_status_21 %>%
  ungroup() %>%
  group_by(ind) %>%
  summarise(count = sum(count),
            WFH = sum(WFH)) %>%
  mutate(WFH / count * 100) %>% 
  arrange(`WFH/count * 100`)
:)   # A tibble: 15 × 4
:)      ind              count   WFH `WFH/count * 100`
:)      <chr>            <int> <dbl>             <dbl>
:)    1 숙박음식업        1695     2             0.118
:)    2 협회및개인서비스   925     6             0.649
:)    3 보건복지          1997    13             0.651
:)    4 건설업            1765    14             0.793
:)    5 예술스포츠여가     381     5             1.31 
:)    6 도소매업          2753    37             1.34 
:)    7 부동산업           365     6             1.64 
:)    8 제조업            3613    61             1.69 
:)    9 운수창고업        1127    20             1.77 
:)   10 사업지원업         966    25             2.59 
:)   11 공공행정          1020    58             5.69 
:)   12 교육              1854   117             6.31 
:)   13 전문과학기술업    1011    67             6.63 
:)   14 금융보험업         699    67             9.59 
:)   15 정보통신업         748    84            11.2

3.2.5 sum up

wfh_status_19202122 <- wfh_status_19 %>%
    left_join(wfh_status_20, by = c('ind', 'edu_college')) %>%
    left_join(wfh_status_21, by = c('ind', 'edu_college')) %>%
    select(ind, edu_college, WFH_ratio_19, WFH_ratio_20, WFH_ratio_21) %>%
    rename(r2019 = WFH_ratio_19,
           r2020 = WFH_ratio_20,
           r2021 = WFH_ratio_21) %>%
    pivot_longer(3:5, names_to = 'year', values_to = 'WFH_ratio') %>%
    ungroup()
wfh_status_19202122
:)   # A tibble: 90 × 4
:)      ind      edu_college year  WFH_ratio
:)      <chr>    <chr>       <chr>     <dbl>
:)    1 건설업   대졸미만    r2019     0    
:)    2 건설업   대졸미만    r2020     0.226
:)    3 건설업   대졸미만    r2021     0.224
:)    4 건설업   대졸이상    r2019     0    
:)    5 건설업   대졸이상    r2020     0.471
:)    6 건설업   대졸이상    r2021     2.58 
:)    7 공공행정 대졸미만    r2019     0    
:)    8 공공행정 대졸미만    r2020     0.946
:)    9 공공행정 대졸미만    r2021     3.58 
:)   10 공공행정 대졸이상    r2019     0.303
:)   # ℹ 80 more rows
wfh_status_19202122 <- wfh_status_19202122 %>%
  mutate(year = recode(year,
                       'r2019' = '2019',
                       'r2020' = '2020',
                       'r2021' = '2021'
                       ))
wfh_status_19202122
:)   # A tibble: 90 × 4
:)      ind      edu_college year  WFH_ratio
:)      <chr>    <chr>       <chr>     <dbl>
:)    1 건설업   대졸미만    2019      0    
:)    2 건설업   대졸미만    2020      0.226
:)    3 건설업   대졸미만    2021      0.224
:)    4 건설업   대졸이상    2019      0    
:)    5 건설업   대졸이상    2020      0.471
:)    6 건설업   대졸이상    2021      2.58 
:)    7 공공행정 대졸미만    2019      0    
:)    8 공공행정 대졸미만    2020      0.946
:)    9 공공행정 대졸미만    2021      3.58 
:)   10 공공행정 대졸이상    2019      0.303
:)   # ℹ 80 more rows

rm(wfh_status_19, wfh_status_20, wfh_status_21, wfh_status_192021)



4 representation of two modes

4.1 origin

4.1.1 import

kosis_ingu_edu_emd_2020 <- readxl::read_excel('data_residential/kosis_ingu_edu_emd_2020.xlsx') 

colnames(kosis_ingu_edu_emd_2020) <- c("adm_cd", "adm_nm", "nu1", "nu2", "nu3", "age", "nu4", "edu_level", 
                                       "nu5", "nu6", "nu7", "count")

kosis_ingu_edu_emd_2020 <- kosis_ingu_edu_emd_2020 %>%
  slice(-1) %>%
  select(adm_cd, adm_nm, age, edu_level, count) %>% 
  
  filter(str_sub(adm_cd, 1, 5) != "23320") %>%     # 23320이 옹진군임
  filter(as.numeric(adm_cd) > 0  & str_length(adm_cd) > 5) %>%
  filter(str_sub(adm_cd, 1, 2) %in% c("11", "23", "31")) %>%
  mutate(sgg_cd = str_sub(adm_cd, 1, 5)) %>%
  left_join(kosis_sgg, by = "sgg_cd") %>%
  select(-sgg_cd) %>%
  mutate(adm_nm = str_c(sgg_nm, "_", adm_nm))
kosis_ingu_edu_emd_2020
:)   # A tibble: 35,776 × 6
:)      adm_cd  adm_nm        age     edu_level                count sgg_nm
:)      <chr>   <chr>         <chr>   <chr>                    <chr> <chr> 
:)    1 1101053 종로구_사직동 20-29세 계                       1068  종로구
:)    2 1101053 종로구_사직동 20-29세 초등학교                 <NA>  종로구
:)    3 1101053 종로구_사직동 20-29세 중학교                   <NA>  종로구
:)    4 1101053 종로구_사직동 20-29세 고등학교                 118   종로구
:)    5 1101053 종로구_사직동 20-29세 대학교(2,3년제)          162   종로구
:)    6 1101053 종로구_사직동 20-29세 대학교(4년제 이상)       671   종로구
:)    7 1101053 종로구_사직동 20-29세 대학원(석박사 과정)      117   종로구
:)    8 1101053 종로구_사직동 20-29세 받지 않았음(미취학 포함) <NA>  종로구
:)    9 1101053 종로구_사직동 30-39세 계                       1266  종로구
:)   10 1101053 종로구_사직동 30-39세 초등학교                 <NA>  종로구
:)   # ℹ 35,766 more rows
kosis_ingu_edu_emd_2020 <- kosis_ingu_edu_emd_2020 %>%
  mutate(count = as.numeric(count)) %>%
  mutate(count = replace_na(count, 0)) %>%
  group_by(adm_cd, adm_nm, edu_level) %>%     # 나이는 모두 20~59세 사이로 한정함.
  summarise(count = sum(count)) %>%
  filter(edu_level %in% c("계", "대학교(4년제 이상)", "대학원(석박사 과정)")) %>%
  pivot_wider(names_from = edu_level, 
                values_from = count) %>%
  ungroup() %>%
  mutate(college = (`대학교(4년제 이상)` + `대학원(석박사 과정)`) / ``) %>%
  select(adm_cd, adm_nm, college, 계) %>%
  rename(pop = 계) %>%
  ungroup()
kosis_ingu_edu_emd_2020
:)   # A tibble: 1,118 × 4
:)      adm_cd  adm_nm                 college   pop
:)      <chr>   <chr>                    <dbl> <dbl>
:)    1 1101053 종로구_사직동            0.808  5034
:)    2 1101054 종로구_삼청동            0.706  1328
:)    3 1101055 종로구_부암동            0.708  5471
:)    4 1101056 종로구_평창동            0.779  9791
:)    5 1101057 종로구_무악동            0.780  4532
:)    6 1101058 종로구_교남동            0.675  6043
:)    7 1101060 종로구_가회동            0.603  2300
:)    8 1101061 종로구_종로1·2·3·4가동   0.682  3543
:)    9 1101063 종로구_종로5·6가동       0.537  3163
:)   10 1101064 종로구_이화동            0.645  5230
:)   # ℹ 1,108 more rows
kosis_ingu_edu_emd_2020 <- kosis_ingu_edu_emd_2020 %>%
  mutate(adm_cd = replace(adm_cd, adm_nm == "구로구_오류2동", "1117068"),     # 생활이동데이터에 기반한 멤버십에 항동이 따로 없으므로 이 단계에서는 필수적으로 이렇게 해야 함.
         adm_cd = replace(adm_cd, adm_nm == "구로구_항동", "1117068"),
         adm_nm = replace(adm_nm, adm_nm == "구로구_항동", "구로구_오류2동")) %>%
  mutate(adm_nm = stringr::str_replace_all(adm_nm, "·", ".")) %>%

  group_by(adm_nm, adm_cd) %>%
  summarise(college = mean(college)) %>%
  ungroup() %>%
  arrange(adm_cd) 
kosis_ingu_edu_emd_2020
:)   # A tibble: 1,117 × 3
:)      adm_nm                 adm_cd  college
:)      <chr>                  <chr>     <dbl>
:)    1 종로구_사직동          1101053   0.808
:)    2 종로구_삼청동          1101054   0.706
:)    3 종로구_부암동          1101055   0.708
:)    4 종로구_평창동          1101056   0.779
:)    5 종로구_무악동          1101057   0.780
:)    6 종로구_교남동          1101058   0.675
:)    7 종로구_가회동          1101060   0.603
:)    8 종로구_종로1.2.3.4가동 1101061   0.682
:)    9 종로구_종로5.6가동     1101063   0.537
:)   10 종로구_이화동          1101064   0.645
:)   # ℹ 1,107 more rows
colSums(is.na(kosis_ingu_edu_emd_2020))
:)    adm_nm  adm_cd college 
:)         0       0       0

4.1.2 join to sf

4.1.2.1 matched

dong.sf_resid_tb_1 <- dong.sf %>%
    st_drop_geometry() %>%
    left_join(kosis_ingu_edu_emd_2020, by = "adm_nm") %>%       # 코드 다른 게 있을까봐 이름만을 기준으로로    
    filter(!is.na(college))                                     # 다행히도 안 중요한 지역들이 매칭 안됨.
dong.sf_resid_tb_1 
:)   # A tibble: 1,116 × 4
:)      adm_nm                 adm_cd.x adm_cd.y college
:)      <chr>                  <chr>    <chr>      <dbl>
:)    1 종로구_사직동          1101053  1101053    0.808
:)    2 종로구_삼청동          1101054  1101054    0.706
:)    3 종로구_부암동          1101055  1101055    0.708
:)    4 종로구_평창동          1101056  1101056    0.779
:)    5 종로구_무악동          1101057  1101057    0.780
:)    6 종로구_교남동          1101058  1101058    0.675
:)    7 종로구_가회동          1101060  1101060    0.603
:)    8 종로구_종로1.2.3.4가동 1101061  1101061    0.682
:)    9 종로구_종로5.6가동     1101063  1101063    0.537
:)   10 종로구_이화동          1101064  1101064    0.645
:)   # ℹ 1,106 more rows

4.1.2.2 unmatched(imputation)

dong.sf_resid_tb_2 <- dong.sf %>%
    st_drop_geometry() %>%
    left_join(kosis_ingu_edu_emd_2020, by = "adm_nm") %>%       # 코드 다른 게 있을까봐 이름만을 기준으로로    
    filter(is.na(college))                                      # 다행히도 안 중요한 지역들이 매칭 안됨.
dong.sf_resid_tb_2
:)   # A tibble: 7 × 4
:)     adm_nm         adm_cd.x adm_cd.y college
:)     <chr>          <chr>    <chr>      <dbl>
:)   1 파주시_진동면  3120021  <NA>          NA
:)   2 파주시_장단면  3120026  <NA>          NA
:)   3 파주시_진서면  3120027  <NA>          NA
:)   4 광주시_쌍령동  3125056  <NA>          NA
:)   5 광주시_탄벌동  3125057  <NA>          NA
:)   6 광주시_광남1동 3125058  <NA>          NA
:)   7 광주시_광남2동 3125059  <NA>          NA
mean_paju <- kosis_ingu_edu_emd_2020 %>%
  filter(str_sub(adm_nm, 1, 3) == "파주시") %>%
  summarise(college = mean(college)) %>%
  as.numeric()
mean_paju
:)   [1] 0.326
mean_gwangju <- kosis_ingu_edu_emd_2020 %>%
  filter(str_sub(adm_nm, 1, 3) == "광주시") %>%
  summarise(college = mean(college)) %>%
  as.numeric()
mean_gwangju
:)   [1] 0.327
dong.sf_resid_tb_2 <- dong.sf_resid_tb_2 %>%
  mutate(college = ifelse(str_sub(adm_nm, 1, 3) == "파주시", mean_paju, mean_gwangju))
dong.sf_resid_tb_2
:)   # A tibble: 7 × 4
:)     adm_nm         adm_cd.x adm_cd.y college
:)     <chr>          <chr>    <chr>      <dbl>
:)   1 파주시_진동면  3120021  <NA>       0.326
:)   2 파주시_장단면  3120026  <NA>       0.326
:)   3 파주시_진서면  3120027  <NA>       0.326
:)   4 광주시_쌍령동  3125056  <NA>       0.327
:)   5 광주시_탄벌동  3125057  <NA>       0.327
:)   6 광주시_광남1동 3125058  <NA>       0.327
:)   7 광주시_광남2동 3125059  <NA>       0.327

4.1.2.3 integrate

dong.sf_resid_tb <- dong.sf_resid_tb_1 %>%
  bind_rows(dong.sf_resid_tb_2)
dong.sf_resid_tb
:)   # A tibble: 1,123 × 4
:)      adm_nm                 adm_cd.x adm_cd.y college
:)      <chr>                  <chr>    <chr>      <dbl>
:)    1 종로구_사직동          1101053  1101053    0.808
:)    2 종로구_삼청동          1101054  1101054    0.706
:)    3 종로구_부암동          1101055  1101055    0.708
:)    4 종로구_평창동          1101056  1101056    0.779
:)    5 종로구_무악동          1101057  1101057    0.780
:)    6 종로구_교남동          1101058  1101058    0.675
:)    7 종로구_가회동          1101060  1101060    0.603
:)    8 종로구_종로1.2.3.4가동 1101061  1101061    0.682
:)    9 종로구_종로5.6가동     1101063  1101063    0.537
:)   10 종로구_이화동          1101064  1101064    0.645
:)   # ℹ 1,113 more rows
colSums(is.na(dong.sf_resid_tb))
:)     adm_nm adm_cd.x adm_cd.y  college 
:)          0        0        7        0
dong.sf_resid_tb %>%
  filter(is.na(adm_cd.y) | adm_cd.x != adm_cd.y)
:)   # A tibble: 9 × 4
:)     adm_nm         adm_cd.x adm_cd.y college
:)     <chr>          <chr>    <chr>      <dbl>
:)   1 광주시_경안동  3125054  3125051    0.319
:)   2 광주시_송정동  3125055  3125052    0.305
:)   3 파주시_진동면  3120021  <NA>       0.326
:)   4 파주시_장단면  3120026  <NA>       0.326
:)   5 파주시_진서면  3120027  <NA>       0.326
:)   6 광주시_쌍령동  3125056  <NA>       0.327
:)   7 광주시_탄벌동  3125057  <NA>       0.327
:)   8 광주시_광남1동 3125058  <NA>       0.327
:)   9 광주시_광남2동 3125059  <NA>       0.327
dong.sf_resid_tb <- dong.sf_resid_tb %>%
  select(-adm_cd.y) %>%
  rename(adm_cd = adm_cd.x)
dong.sf_resid_tb
:)   # A tibble: 1,123 × 3
:)      adm_nm                 adm_cd  college
:)      <chr>                  <chr>     <dbl>
:)    1 종로구_사직동          1101053   0.808
:)    2 종로구_삼청동          1101054   0.706
:)    3 종로구_부암동          1101055   0.708
:)    4 종로구_평창동          1101056   0.779
:)    5 종로구_무악동          1101057   0.780
:)    6 종로구_교남동          1101058   0.675
:)    7 종로구_가회동          1101060   0.603
:)    8 종로구_종로1.2.3.4가동 1101061   0.682
:)    9 종로구_종로5.6가동     1101063   0.537
:)   10 종로구_이화동          1101064   0.645
:)   # ℹ 1,113 more rows
dong.sf_resid <- dong.sf %>%
  left_join(dong.sf_resid_tb, by =  c("adm_nm", "adm_cd"))      # 여기서는 코드까지 맞춰도 다 맞음
dong.sf_resid
:)   Simple feature collection with 1123 features and 3 fields
:)   Geometry type: GEOMETRY
:)   Dimension:     XY
:)   Bounding box:  xmin: 865000 ymin: 1880000 xmax: 1030000 ymax: 2030000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 1,123 × 4
:)      adm_nm                 adm_cd                                                                                 geometry college
:)      <chr>                  <chr>                                                                        <MULTIPOLYGON [m]>   <dbl>
:)    1 종로구_사직동          1101053 (((953554 1953336, 953555 1953320, 953556 1953307, 953557 1953295, 953558 1953281, 9...   0.808
:)    2 종로구_삼청동          1101054 (((953844 1955492, 953859 1955490, 953902 1955493, 953912 1955493, 953916 1955492, 9...   0.706
:)    3 종로구_부암동          1101055 (((952490 1956549, 952498 1956533, 952501 1956525, 952501 1956524, 952492 1956515, 9...   0.708
:)    4 종로구_평창동          1101056 (((953684 1959210, 953665 1959132, 953647 1959057, 953651 1959043, 953672 1958971, 9...   0.779
:)    5 종로구_무악동          1101057 (((952298 1953540, 952325 1953508, 952329 1953500, 952338 1953484, 952339 1953482, 9...   0.780
:)    6 종로구_교남동          1101058 (((952572 1953259, 952573 1953256, 952575 1953250, 952577 1953241, 952580 1953234, 9...   0.675
:)    7 종로구_가회동          1101060 (((954895 1954615, 954888 1954592, 954865 1954592, 954856 1954592, 954838 1954563, 9...   0.603
:)    8 종로구_종로1.2.3.4가동 1101061 (((954918 1954372, 954926 1954362, 954932 1954355, 954937 1954352, 954949 1954346, 9...   0.682
:)    9 종로구_종로5.6가동     1101063 (((956607 1953150, 956607 1953148, 956607 1953146, 956607 1953144, 956607 1953139, 9...   0.537
:)   10 종로구_이화동          1101064 (((956366 1954112, 956372 1954108, 956379 1954108, 956379 1954108, 956408 1954108, 9...   0.645
:)   # ℹ 1,113 more rows
colSums(is.na(dong.sf_resid))
:)     adm_nm   adm_cd geometry  college 
:)          0        0        0        0
dong.sf_resid_tb <- dong.sf_resid %>%
  st_drop_geometry()

rm(dong.sf_resid_tb_1, dong.sf_resid_tb_2)

4.1.3 figure 4a

temp <- dong.sf_resid %>%
  st_geometry() %>%
  st_intersects()
temp
:)   Sparse geometry binary predicate list of length 1123, where the predicate was `intersects'
:)   first 10 elements:
:)    1: 1, 5, 6, 8, 16, 18, 20
:)    2: 2, 3, 7, 8, 16, 17, 126
:)    3: 2, 3, 4, 16, 126, 195, 197
:)    4: 3, 4, 117, 118, 126, 178, 189, 197, 801
:)    5: 1, 5, 6, 16, 193, 194, 196
:)    6: 1, 5, 6, 18, 193, 203
:)    7: 2, 7, 8, 17
:)    8: 1, 2, 7, 8, 9, 10, 16, 17, 20, 24
:)    9: 8, 9, 10, 11, 12, 23, 24, 28
:)    10: 8, 9, 10, 12, 17, 127
a <- c()
for (i in 1:1123) {
  len <- length(temp[[i]])  
  a[i] <- len}
a
:)    [1]  7  7  7  9  7  6  4 10  8  6  8  6  5  6  6  8  7  7  8  7  8  8  6  6  6  7  6 10  7  6  6  8  5  9  7  7  6  5  8  6  6  5 10  5  7  7  8 10  7  6  7  7  5  7  7  7  8  7  5  7  9  8  7  7  8  9  5  7  5  6  8  8  7  7  7
:)    [ reached getOption("max.print") -- omitted 1048 entries ]
min(a)
:)   [1] 1
dong.sf_resid$num_of_neighbors <- a 
dong.sf_resid
:)   Simple feature collection with 1123 features and 4 fields
:)   Geometry type: GEOMETRY
:)   Dimension:     XY
:)   Bounding box:  xmin: 865000 ymin: 1880000 xmax: 1030000 ymax: 2030000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 1,123 × 5
:)      adm_nm                 adm_cd                                                                                 geometry college num_of_neighbors
:)    * <chr>                  <chr>                                                                        <MULTIPOLYGON [m]>   <dbl>            <int>
:)    1 종로구_사직동          1101053 (((953554 1953336, 953555 1953320, 953556 1953307, 953557 1953295, 953558 1953281, 9...   0.808                7
:)    2 종로구_삼청동          1101054 (((953844 1955492, 953859 1955490, 953902 1955493, 953912 1955493, 953916 1955492, 9...   0.706                7
:)    3 종로구_부암동          1101055 (((952490 1956549, 952498 1956533, 952501 1956525, 952501 1956524, 952492 1956515, 9...   0.708                7
:)    4 종로구_평창동          1101056 (((953684 1959210, 953665 1959132, 953647 1959057, 953651 1959043, 953672 1958971, 9...   0.779                9
:)    5 종로구_무악동          1101057 (((952298 1953540, 952325 1953508, 952329 1953500, 952338 1953484, 952339 1953482, 9...   0.780                7
:)    6 종로구_교남동          1101058 (((952572 1953259, 952573 1953256, 952575 1953250, 952577 1953241, 952580 1953234, 9...   0.675                6
:)    7 종로구_가회동          1101060 (((954895 1954615, 954888 1954592, 954865 1954592, 954856 1954592, 954838 1954563, 9...   0.603                4
:)    8 종로구_종로1.2.3.4가동 1101061 (((954918 1954372, 954926 1954362, 954932 1954355, 954937 1954352, 954949 1954346, 9...   0.682               10
:)    9 종로구_종로5.6가동     1101063 (((956607 1953150, 956607 1953148, 956607 1953146, 956607 1953144, 956607 1953139, 9...   0.537                8
:)   10 종로구_이화동          1101064 (((956366 1954112, 956372 1954108, 956379 1954108, 956379 1954108, 956408 1954108, 9...   0.645                6
:)   # ℹ 1,113 more rows

4.1.4 stat

summary(dong.sf_resid_tb$college)
:)      Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
:)     0.144   0.314   0.436   0.464   0.597   0.946
sd(dong.sf_resid_tb$college)
:)   [1] 0.178

4.2 destination

4.2.1 import

# 여기도 항동 없음
jongsaja <- readxl::read_xls("data_industrial/economic_activity_survey_by_industry.xls")

colnames(jongsaja) <- c("sgg_nm", "adm_nm", "total_employ", 
                        '농어업', '광업', '제조업', '전기가스업', '수도하수업', '건설업', '도소매업', '운수창고업', '숙박음식업', '정보통신업', '금융보험업', '부동산업', '전문과학기술업', '사업지원업', '공공행정', '교육', '보건복지', '예술스포츠여가', '협회및개인서비스') 
jongsaja <- jongsaja %>%
    slice(-1) %>%
    filter(adm_nm != "소계") %>%
    mutate_at(4:22, as.numeric) %>%
    mutate_at(4:22, tidyr::replace_na, 0) %>%
    mutate(adm_nm = stringr::str_replace_all(adm_nm, "·", "."),
           adm_nm = replace(adm_nm, adm_nm=="여의도동", "여의동")) %>%
    mutate(adm_nm = str_c(sgg_nm, "_", adm_nm)) %>%
    select(-c(sgg_nm, total_employ)) 

colSums(is.na(jongsaja))
:)             adm_nm           농어업             광업           제조업       전기가스업       수도하수업           건설업         도소매업       운수창고업       숙박음식업       정보통신업       금융보험업         부동산업   전문과학기술업       사업지원업         공공행정             교육         보건복지   예술스포츠여가 협회및개인서비스 
:)                  0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0
jongsaja %>%
    as.data.frame()
:)            adm_nm 농어업 광업 제조업 전기가스업 수도하수업 건설업 도소매업 운수창고업 숙박음식업 정보통신업 금융보험업 부동산업 전문과학기술업 사업지원업 공공행정 교육 보건복지 예술스포츠여가 협회및개인서비스
:)   1 종로구_사직동     12    2    359         11         34    688     2726       2513       4124       2133       7794     1091           8098       7408     6088  786     1096           1653              906
:)   2 종로구_삼청동      0    0     40          0          0     57      910         93        874         76         63       35            348        108     1291  297       30            185              194
:)   3 종로구_부암동      0    1    158          0          0     76      550         39        394        126         29       91            110         73       60 1190      183             49              325
:)    [ reached 'max' / getOption("max.print") -- omitted 421 rows ]

4.2.2 aggregate by community

b <- dong.sf %>%
    filter(str_sub(adm_cd, 1, 2) == "11") %>%
    st_drop_geometry() %>%
    select(adm_cd, adm_nm)

jongsaja %>%
    left_join(b, by = c("adm_nm" = "adm_nm")) %>%
    relocate(adm_cd, adm_nm) %>%
  is.na() %>%
  colSums()
:)             adm_cd           adm_nm           농어업             광업           제조업       전기가스업       수도하수업           건설업         도소매업       운수창고업       숙박음식업       정보통신업       금융보험업         부동산업   전문과학기술업       사업지원업         공공행정             교육         보건복지   예술스포츠여가 협회및개인서비스 
:)                  0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0                0
jongsaja_comm <- jongsaja %>%
    left_join(b, by = "adm_nm") %>%
    relocate(adm_cd, adm_nm) %>%
    left_join(membership_info_eng, by = "adm_cd") %>%
    relocate(adm_cd, member, name) %>%
    group_by(name) %>%
    summarise(across(is.numeric, ~ sum(.x, na.rm = TRUE))) %>%
    select(-c("건설업", "농어업", "광업", "수도하수업", "전기가스업"))           #건설업 집계 못 믿을만함
jongsaja_comm
:)   # A tibble: 54 × 15
:)      name       제조업 도소매업 운수창고업 숙박음식업 정보통신업 금융보험업 부동산업 전문과학기술업 사업지원업 공공행정  교육 보건복지 예술스포츠여가 협회및개인서비스
:)      <chr>       <dbl>    <dbl>      <dbl>      <dbl>      <dbl>      <dbl>    <dbl>          <dbl>      <dbl>    <dbl> <dbl>    <dbl>          <dbl>            <dbl>
:)    1 Anam         3079     9266       3677       9318       1131       1057     2984           2209       3913     2916 16613     9693           1628             3562
:)    2 Balsan       2733    10981       5718       6454       1026       1369     1579           1670       3434     2375  6081     8513           1386             2901
:)    3 Bangbae      1730    14864       3598       9023       5486       4439     4306           7100      15193     1055  6795     5852           1712             4631
:)    4 Banghak      3657    10353       8533       7833        645       1086     2678           1689       2019     3840  6207    11065           1677             4260
:)    5 Bangi        2631    10265       1819       6173       2125        992     2141           3624       9561     1119  7238    11880           2185             2045
:)    6 Banpo         840    11117       2459       7518       2434       1076     2825           4530       4071      674  5285     6986           2309             1906
:)    7 Changshin    6232    15381       1587       3928       1920       2773     1159           1251       2170      531   938     1565            519             1484
:)    8 Cheongdam    6649    35218       2281      16725      12338       5342     7477          25307      16321     3234  5466     8937           1994             8601
:)    9 Chunghyeon   1002     4467       1088       2405       2293       5324      745           3589       3625     3030  2323     1035            263             1411
:)   10 Daechi       3914    23571       2861       9480       9407       9381     6669          15237      12570      469 10135     7578           1870             3491
:)   # ℹ 44 more rows

Manufacturing (MF) Wholesale and retail trade (WR) Transportation and storage (TS) Accommodation and food service activities (AF) Information and communication (IC) Financial and insurance activities (FI) Real estate activities (RE) Professional, scientific and technical activities (PT) Business facilities management and business support services; rental and leasing activities (BF) Public administration and defence; compulsory social security (PA) Education (EC) Human health and social work activities (HS) Arts, sports and recreation related services (RS) Membership organizations, repair and other personal services (PS)

colnames(jongsaja_comm) <- c("name", "MF", "WR", "TS", "AF", "IC", "FI", "RE", "PT", "BF", "PA", "EC", "HS", "RS", "PS")
jongsaja_comm %>%
    write_csv("data_industrial/jongsaja_comm_20230823.csv")
jongsaja_comm <- read_csv("data_industrial/jongsaja_comm_20230823.csv")
jongsaja_comm
:)   # A tibble: 54 × 15
:)      name          MF    WR    TS    AF    IC    FI    RE    PT    BF    PA    EC    HS    RS    PS
:)      <chr>      <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:)    1 Anam        3079  9266  3677  9318  1131  1057  2984  2209  3913  2916 16613  9693  1628  3562
:)    2 Balsan      2733 10981  5718  6454  1026  1369  1579  1670  3434  2375  6081  8513  1386  2901
:)    3 Bangbae     1730 14864  3598  9023  5486  4439  4306  7100 15193  1055  6795  5852  1712  4631
:)    4 Banghak     3657 10353  8533  7833   645  1086  2678  1689  2019  3840  6207 11065  1677  4260
:)    5 Bangi       2631 10265  1819  6173  2125   992  2141  3624  9561  1119  7238 11880  2185  2045
:)    6 Banpo        840 11117  2459  7518  2434  1076  2825  4530  4071   674  5285  6986  2309  1906
:)    7 Changshin   6232 15381  1587  3928  1920  2773  1159  1251  2170   531   938  1565   519  1484
:)    8 Cheongdam   6649 35218  2281 16725 12338  5342  7477 25307 16321  3234  5466  8937  1994  8601
:)    9 Chunghyeon  1002  4467  1088  2405  2293  5324   745  3589  3625  3030  2323  1035   263  1411
:)   10 Daechi      3914 23571  2861  9480  9407  9381  6669 15237 12570   469 10135  7578  1870  3491
:)   # ℹ 44 more rows

4.2.3 filter out using threshold

4.2.3.1 employment numbers and density of each employment district

jongsaja_comm_rowsums <- jongsaja_comm %>%
    column_to_rownames("name") %>%
    as.data.frame() %>%
    rowSums() 

jongsaja_comm_rowsums <- cbind(jongsaja_comm_rowsums, jongsaja_comm$name) %>%
    as_tibble() 

colnames(jongsaja_comm_rowsums) <- c("total_emp", "name")
jongsaja_comm_rowsums <- jongsaja_comm_rowsums %>%
    mutate(total_emp = as.numeric(total_emp))
jongsaja_comm_rowsums
:)   # A tibble: 54 × 2
:)      total_emp name      
:)          <dbl> <chr>     
:)    1     71046 Anam      
:)    2     56220 Balsan    
:)    3     85784 Bangbae   
:)    4     65542 Banghak   
:)    5     63798 Bangi     
:)    6     54030 Banpo     
:)    7     41438 Changshin 
:)    8    155890 Cheongdam 
:)    9     32600 Chunghyeon
:)   10    116633 Daechi    
:)   # ℹ 44 more rows
dong.sf_commune <- dong.sf_commune %>%
    left_join(jongsaja_comm_rowsums, by = "name") %>%
    mutate(density_emp = total_emp / as.numeric(area) * 10000) %>%        # 1m^2가 아닌 100m^2 당 고용인구
    mutate(log_total_emp = log(total_emp),
           log_density_emp = log(density_emp))
dong.sf_commune
:)   Simple feature collection with 54 features and 6 fields
:)   Geometry type: GEOMETRY
:)   Dimension:     XY
:)   Bounding box:  xmin: 935000 ymin: 1940000 xmax: 972000 ymax: 1970000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 54 × 7
:)      name                                                                                           geometry      area total_emp density_emp log_total_emp log_density_emp
:)    * <chr>                                                                                     <POLYGON [m]>     [m^2]     <dbl>       <dbl>         <dbl>           <dbl>
:)    1 Anam       ((957987 1953401, 957976 1953392, 957958 1953376, 957944 1953364, 957936 1953357, 957909 ... 15774236.     71046        45.0          11.2            3.81
:)    2 Balsan     ((941113 1947544, 941103 1947542, 941103 1947542, 941062 1947534, 941061 1947533, 941057 ...  8854094.     56220        63.5          10.9            4.15
:)    3 Bangbae    ((954771 1940006, 954755 1939998, 954750 1939996, 954733 1939987, 954711 1939975, 954653 ... 15289435.     85784        56.1          11.4            4.03
:)    4 Banghak    ((959860 1959717, 959857 1959710, 959854 1959705, 959858 1959697, 959852 1959692, 959853 ... 19514974.     65542        33.6          11.1            3.51
:)    5 Bangi      ((968192 1945526, 968204 1945515, 968204 1945515, 968210 1945508, 968211 1945506, 968211 ... 10361966.     63798        61.6          11.1            4.12
:)    6 Banpo      ((956728 1945168, 956728 1945168, 956729 1945167, 956729 1945166, 956730 1945165, 956730 ...  6799150.     54030        79.5          10.9            4.38
:)    7 Changshin  ((956717 1952379, 956714 1952379, 956710 1952379, 956708 1952379, 956703 1952379, 956701 ...  1972479.     41438       210.           10.6            5.35
:)    8 Cheongdam  ((960137 1945124, 960129 1945145, 960128 1945146, 960111 1945193, 960109 1945199, 960100 ...  7369461.    155890       212.           12.0            5.35
:)    9 Chunghyeon ((953195 1951524, 953196 1951519, 953197 1951516, 953199 1951516, 953206 1951503, 953207 ...  2057803.     32600       158.           10.4            5.07
:)   10 Daechi     ((961579 1941234, 961557 1941223, 961545 1941217, 961524 1941214, 961492 1941208, 961476 ... 10038396.    116633       116.           11.7            4.76
:)   # ℹ 44 more rows
top_dest <- read_csv("data_industrial/data_topdestinations_20230608.csv") %>%
  mutate(W_commune_nm = ifelse(W_commune_nm == "Myeong", "Myeongdong", W_commune_nm))
top_dest
:)   # A tibble: 54 × 2
:)      W_commune_nm total_inflow
:)      <chr>               <dbl>
:)    1 Yeoksam          2447083.
:)    2 Guro             2382391.
:)    3 Myeongdong       2325144.
:)    4 Jongno           1775598.
:)    5 Yeoui            1738367.
:)    6 Cheongdam        1519280.
:)    7 Seongsu          1363683.
:)    8 Yeongdeungpo     1296826.
:)    9 Sinchon          1278512.
:)   10 Daechi           1217771.
:)   # ℹ 44 more rows
dong.sf_commune <- dong.sf_commune %>%
  left_join(top_dest, by = c("name" = "W_commune_nm")) 
dong.sf_commune 
:)   Simple feature collection with 54 features and 7 fields
:)   Geometry type: GEOMETRY
:)   Dimension:     XY
:)   Bounding box:  xmin: 935000 ymin: 1940000 xmax: 972000 ymax: 1970000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 54 × 8
:)      name                                                                                           geometry      area total_emp density_emp log_total_emp log_density_emp total_inflow
:)      <chr>                                                                                     <POLYGON [m]>     [m^2]     <dbl>       <dbl>         <dbl>           <dbl>        <dbl>
:)    1 Anam       ((957987 1953401, 957976 1953392, 957958 1953376, 957944 1953364, 957936 1953357, 957909 ... 15774236.     71046        45.0          11.2            3.81      536318.
:)    2 Balsan     ((941113 1947544, 941103 1947542, 941103 1947542, 941062 1947534, 941061 1947533, 941057 ...  8854094.     56220        63.5          10.9            4.15      337574.
:)    3 Bangbae    ((954771 1940006, 954755 1939998, 954750 1939996, 954733 1939987, 954711 1939975, 954653 ... 15289435.     85784        56.1          11.4            4.03      594415.
:)    4 Banghak    ((959860 1959717, 959857 1959710, 959854 1959705, 959858 1959697, 959852 1959692, 959853 ... 19514974.     65542        33.6          11.1            3.51      386032.
:)    5 Bangi      ((968192 1945526, 968204 1945515, 968204 1945515, 968210 1945508, 968211 1945506, 968211 ... 10361966.     63798        61.6          11.1            4.12      534683.
:)    6 Banpo      ((956728 1945168, 956728 1945168, 956729 1945167, 956729 1945166, 956730 1945165, 956730 ...  6799150.     54030        79.5          10.9            4.38      459624.
:)    7 Changshin  ((956717 1952379, 956714 1952379, 956710 1952379, 956708 1952379, 956703 1952379, 956701 ...  1972479.     41438       210.           10.6            5.35      380417.
:)    8 Cheongdam  ((960137 1945124, 960129 1945145, 960128 1945146, 960111 1945193, 960109 1945199, 960100 ...  7369461.    155890       212.           12.0            5.35     1519280.
:)    9 Chunghyeon ((953195 1951524, 953196 1951519, 953197 1951516, 953199 1951516, 953206 1951503, 953207 ...  2057803.     32600       158.           10.4            5.07      917891.
:)   10 Daechi     ((961579 1941234, 961557 1941223, 961545 1941217, 961524 1941214, 961492 1941208, 961476 ... 10038396.    116633       116.           11.7            4.76     1217771.
:)   # ℹ 44 more rows

4.2.3.2 filter out

THRESHOLD = 0.73
#THRESHOLD = 0.5
#THRESHOLD = 0.8
#THRESHOLD = 0.9

dong.sf_commune_filtered <- dong.sf_commune %>%
  filter(total_inflow >= quantile(total_inflow, THRESHOLD))              # 이런 거 조심!!!!
dong.sf_commune_filtered %>%
  relocate(name, total_inflow, total_emp, density_emp) %>%
  arrange(desc(total_inflow))
:)   Simple feature collection with 15 features and 7 fields
:)   Geometry type: GEOMETRY
:)   Dimension:     XY
:)   Bounding box:  xmin: 944000 ymin: 1940000 xmax: 970000 ymax: 1960000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 15 × 8
:)      name         total_inflow total_emp density_emp                                                                                             geometry      area log_total_emp log_density_emp
:)      <chr>               <dbl>     <dbl>       <dbl>                                                                                       <GEOMETRY [m]>     [m^2]         <dbl>           <dbl>
:)    1 Yeoksam          2447083.    214306       479.  POLYGON ((958823 1942849, 958802 1942857, 958771 1942870, 958771 1942868, 958777 1942838, 958776 ...  4469436.          12.3            6.17
:)    2 Guro             2382391.    336770       304.  POLYGON ((946589 1942280, 946594 1942258, 946602 1942228, 946607 1942205, 946608 1942204, 946611 ... 11092136.          12.7            5.72
:)    3 Myeongdong       2325144.    217113       773.  POLYGON ((954497 1950831, 954497 1950820, 954502 1950815, 954506 1950812, 954515 1950804, 954511 ...  2809133.          12.3            6.65
:)    4 Jongno           1775598.    129040       203.  POLYGON ((955802 1953069, 955799 1953064, 955798 1953061, 955797 1953059, 955795 1953054, 955794 ...  6346426.          11.8            5.31
:)    5 Yeoui            1738367.    151575       180.  MULTIPOLYGON (((949183 1949093, 949187 1949091, 949190 1949094, 949206 1949086, 949211 1949085, 9...  8431326.          11.9            5.19
:)    6 Cheongdam        1519280.    155890       212.  POLYGON ((960137 1945124, 960129 1945145, 960128 1945146, 960111 1945193, 960109 1945199, 960100 ...  7369461.          12.0            5.35
:)    7 Seongsu          1363683.    155725       113.  POLYGON ((961365 1949039, 961352 1949009, 961351 1949007, 961348 1948999, 961341 1948985, 961338 ... 13747639.          12.0            4.73
:)    8 Yeongdeungpo     1296826.    154833       166.  GEOMETRYCOLLECTION (POLYGON ((947987 1947559, 948020 1947495, 948038 1947459, 948052 1947433, 948...  9328081.          12.0            5.11
:)    9 Sinchon          1278512.    152826       105.  POLYGON ((948402 1949282, 948351 1949283, 947902 1949285, 947774 1949286, 947725 1949286, 947714 ... 14590732.          11.9            4.65
:)   10 Daechi           1217771.    116633       116.  POLYGON ((961579 1941234, 961557 1941223, 961545 1941217, 961524 1941214, 961492 1941208, 961476 ... 10038396.          11.7            4.76
:)   11 Gwanghui         1212202.    159615       239.  POLYGON ((957266 1950489, 957262 1950476, 957249 1950474, 957245 1950468, 957219 1950435, 957203 ...  6689535.          12.0            5.47
:)   12 Samseong         1198605.    120988       320.  POLYGON ((962184 1944598, 962175 1944598, 962166 1944598, 962183 1944573, 962189 1944556, 962190 ...  3779139.          11.7            5.77
:)   13 Munjeong         1019500.    110472        98.5 POLYGON ((970042 1944496, 970040 1944493, 970036 1944485, 970036 1944484, 970036 1944481, 970036 ... 11209772.          11.6            4.59
:)   14 Jamsil           1016366.    123030       102.  POLYGON ((966155 1945440, 966223 1945405, 966274 1945378, 966281 1945375, 966285 1945373, 966285 ... 12092064.          11.7            4.62
:)   15 Seocho            988172.    103264       236.  POLYGON ((956921 1944435, 956929 1944385, 956930 1944376, 956935 1944348, 956941 1944319, 956948 ...  4377955.          11.5            5.46
dong.sf_commune %>%
  filter(total_emp >= quantile(total_emp, THRESHOLD)) %>%
  relocate(name, total_inflow, total_emp, density_emp) %>%
  arrange(desc(total_emp))
:)   Simple feature collection with 15 features and 7 fields
:)   Geometry type: GEOMETRY
:)   Dimension:     XY
:)   Bounding box:  xmin: 944000 ymin: 1940000 xmax: 970000 ymax: 1960000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 15 × 8
:)      name         total_inflow total_emp density_emp                                                                                             geometry      area log_total_emp log_density_emp
:)      <chr>               <dbl>     <dbl>       <dbl>                                                                                       <GEOMETRY [m]>     [m^2]         <dbl>           <dbl>
:)    1 Guro             2382391.    336770       304.  POLYGON ((946589 1942280, 946594 1942258, 946602 1942228, 946607 1942205, 946608 1942204, 946611 ... 11092136.          12.7            5.72
:)    2 Myeongdong       2325144.    217113       773.  POLYGON ((954497 1950831, 954497 1950820, 954502 1950815, 954506 1950812, 954515 1950804, 954511 ...  2809133.          12.3            6.65
:)    3 Yeoksam          2447083.    214306       479.  POLYGON ((958823 1942849, 958802 1942857, 958771 1942870, 958771 1942868, 958777 1942838, 958776 ...  4469436.          12.3            6.17
:)    4 Gwanghui         1212202.    159615       239.  POLYGON ((957266 1950489, 957262 1950476, 957249 1950474, 957245 1950468, 957219 1950435, 957203 ...  6689535.          12.0            5.47
:)    5 Cheongdam        1519280.    155890       212.  POLYGON ((960137 1945124, 960129 1945145, 960128 1945146, 960111 1945193, 960109 1945199, 960100 ...  7369461.          12.0            5.35
:)    6 Seongsu          1363683.    155725       113.  POLYGON ((961365 1949039, 961352 1949009, 961351 1949007, 961348 1948999, 961341 1948985, 961338 ... 13747639.          12.0            4.73
:)    7 Yeongdeungpo     1296826.    154833       166.  GEOMETRYCOLLECTION (POLYGON ((947987 1947559, 948020 1947495, 948038 1947459, 948052 1947433, 948...  9328081.          12.0            5.11
:)    8 Sinchon          1278512.    152826       105.  POLYGON ((948402 1949282, 948351 1949283, 947902 1949285, 947774 1949286, 947725 1949286, 947714 ... 14590732.          11.9            4.65
:)    9 Yeoui            1738367.    151575       180.  MULTIPOLYGON (((949183 1949093, 949187 1949091, 949190 1949094, 949206 1949086, 949211 1949085, 9...  8431326.          11.9            5.19
:)   10 Jongno           1775598.    129040       203.  POLYGON ((955802 1953069, 955799 1953064, 955798 1953061, 955797 1953059, 955795 1953054, 955794 ...  6346426.          11.8            5.31
:)   11 Jamsil           1016366.    123030       102.  POLYGON ((966155 1945440, 966223 1945405, 966274 1945378, 966281 1945375, 966285 1945373, 966285 ... 12092064.          11.7            4.62
:)   12 Yongsin           891353.    122657        90.2 POLYGON ((962199 1951279, 962184 1951259, 962177 1951250, 962176 1951250, 962174 1951246, 962166 ... 13591954.          11.7            4.50
:)   13 Samseong         1198605.    120988       320.  POLYGON ((962184 1944598, 962175 1944598, 962166 1944598, 962183 1944573, 962189 1944556, 962190 ...  3779139.          11.7            5.77
:)   14 Daechi           1217771.    116633       116.  POLYGON ((961579 1941234, 961557 1941223, 961545 1941217, 961524 1941214, 961492 1941208, 961476 ... 10038396.          11.7            4.76
:)   15 Munjeong         1019500.    110472        98.5 POLYGON ((970042 1944496, 970040 1944493, 970036 1944485, 970036 1944484, 970036 1944481, 970036 ... 11209772.          11.6            4.59

4.2.4 biplot of compositional data

4.2.4.1 set up for compositional data

jongsaja_comm_filtered <- jongsaja_comm %>%
  filter(name %in% dong.sf_commune_filtered$name)
jongsaja_comm_filtered
:)   # A tibble: 15 × 15
:)      name            MF    WR    TS    AF    IC    FI    RE    PT    BF    PA    EC    HS    RS    PS
:)      <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
:)    1 Cheongdam     6649 35218  2281 16725 12338  5342  7477 25307 16321  3234  5466  8937  1994  8601
:)    2 Daechi        3914 23571  2861  9480  9407  9381  6669 15237 12570   469 10135  7578  1870  3491
:)    3 Guro         43635 62403  8998 17013 69286  9807  5548 43186 42657  2606  8855 12402  2615  7759
:)    4 Gwanghui     26506 53669  2081 14591  5709  4721  3887 13237 13046  4757  6032  6330  1890  3159
:)    5 Jamsil        2001 29720  3247 14677 18254  6394  4846  8715  6821  3109  7341  8126  4844  4935
:)    6 Jongno        6187 21876  2723 16509 11779 12616  3519 12487  8122  7014  9110  9960  3053  4085
:)    7 Munjeong      4476 23459  9481  9168  7517  1498  5810 18509  7795  3538  5592  8451  1653  3525
:)    8 Myeongdong    1966 40733 11292 19350 18958 46006  5390 22197 23731 15481  2211  3697  1633  4468
:)    9 Samseong      1834 34794  1203  8693 12460  4715  3896 20386 20911   902  4496  2495  1933  2270
:)   10 Seocho        1709 15680  1819  6872 12069  2932  3261 21865 15767  6862  5152  3005  2832  3439
:)   11 Seongsu      24868 30278  6366 12132 11536  3147  5518 15441 13740  3801 11379  8589  2000  6930
:)   12 Sinchon       3176 23458  5568 26346 11100  4449  3111 13052 16398  2507 18571 15650  3183  6257
:)   13 Yeoksam       2321 38715  1799 19338 22309 20714 10619 46336 20988  2375  9748 10761  2240  6043
:)   14 Yeongdeungpo 10281 30375  6301 11452 12847 10315  3343 11238 38711  3229  3876  6262  1842  4761
:)   15 Yeoui          656 13448  1782 10065 15955 47774  4229 22939 18250  5196  1775  3860  1363  4283
jongsaja_comm_filtered_acomp <- jongsaja_comm_filtered %>%
  column_to_rownames('name') %>%
  as.matrix() %>%
  as.table() %>% 
  compositions::acomp() %>%
    as.matrix() %>%
    as.data.frame()
jongsaja_comm_filtered_acomp
:)                 MF    WR     TS     AF     IC     FI     RE     PT     BF      PA     EC     HS      RS     PS
:)   Cheongdam 0.0427 0.226 0.0146 0.1073 0.0791 0.0343 0.0480 0.1623 0.1047 0.02075 0.0351 0.0573 0.01279 0.0552
:)   Daechi    0.0336 0.202 0.0245 0.0813 0.0807 0.0804 0.0572 0.1306 0.1078 0.00402 0.0869 0.0650 0.01603 0.0299
:)   Guro      0.1296 0.185 0.0267 0.0505 0.2057 0.0291 0.0165 0.1282 0.1267 0.00774 0.0263 0.0368 0.00776 0.0230
:)   Gwanghui  0.1661 0.336 0.0130 0.0914 0.0358 0.0296 0.0244 0.0829 0.0817 0.02980 0.0378 0.0397 0.01184 0.0198
:)   Jamsil    0.0163 0.242 0.0264 0.1193 0.1484 0.0520 0.0394 0.0708 0.0554 0.02527 0.0597 0.0660 0.03937 0.0401
:)    [ reached 'max' / getOption("max.print") -- omitted 10 rows ]

4.2.4.2 CLR transformation

jongsaja_comm_filtered_acomp_clr <- easyCODA::CLR(jongsaja_comm_filtered_acomp)$LR
jongsaja_comm_filtered_acomp_clr
:)                    MF    WR     TS      AF      IC     FI     RE     PT      BF      PA      EC      HS     RS     PS
:)   Cheongdam    -0.685 0.982 -1.755  0.2375 -0.0668 -0.904 -0.568  0.652  0.2130 -1.4057 -0.8809 -0.3892 -1.889 -0.428
:)   Daechi       -0.916 0.879 -1.229 -0.0315 -0.0392 -0.042 -0.383  0.443  0.2506 -3.0378  0.0353 -0.2554 -1.655 -1.030
:)   Guro          0.525 0.883 -1.054 -0.4168  0.9875 -0.968 -1.537  0.515  0.5024 -2.2930 -1.0698 -0.7329 -2.290 -1.202
:)   Gwanghui      0.807 1.512 -1.738  0.2099 -0.7285 -0.919 -1.113  0.112  0.0979 -0.9109 -0.6735 -0.6252 -1.834 -1.320
:)   Jamsil       -1.592 1.106 -1.108  0.4005  0.6186 -0.430 -0.708 -0.121 -0.3658 -1.1515 -0.2923 -0.1907 -0.708 -0.689
:)    [ getOption("max.print") 에 도달했습니다 -- 10 행들을 생략합니다 ]
4.2.4.2.1 row weight

it is better to choose density_emp(than total_emp) as the row weight for PCA

row_weight <- dong.sf_commune_filtered %>%
  st_drop_geometry() %>%
#  select(name, total_emp) %>%     # name in alphametic order
#  select(total_emp) %>%
#  pull(total_emp) 
  
  select(name, density_emp) %>%     # name in alphametic order
  select(density_emp) %>%
  pull(density_emp)   
  
#  select(name, total_inflow) %>%     # name in alphametic order
#  select(total_inflow) %>%
#  pull(total_inflow)   
  
row_weight <- row_weight / sum(row_weight)
#row_weight <- rep(1/length(dong.sf_commune_filtered$name), length(dong.sf_commune_filtered$name))
row_weight
:)    [1] 0.0580 0.0319 0.0833 0.0654 0.0279 0.0558 0.0270 0.2120 0.0878 0.0647 0.0311 0.0287 0.1315 0.0455 0.0493

4.2.4.3 PCA

LR_wt <- easyCODA::CLR(jongsaja_comm_filtered_acomp)$LR.wt  # different weight among industries
#LR_wt <- rep(1/14, 14)
LR_wt
:)       MF     WR     TS     AF     IC     FI     RE     PT     BF     PA     EC     HS     RS     PS 
:)   0.0520 0.2009 0.0287 0.0913 0.0970 0.0762 0.0339 0.1298 0.1139 0.0284 0.0494 0.0506 0.0162 0.0318
jongsaja_comm_filtered_acomp_clr_pca <- easyCODA::PCA(jongsaja_comm_filtered_acomp_clr,
                                             nd = 2,
                                             weight = LR_wt,
                                             #weight = T,
                                             row.wt = row_weight   
                                             )
jongsaja_comm_filtered_acomp_clr_pca
:)   
:)    Principal inertias (eigenvalues):
:)              1        2        3        4        5        6        7        8       9        10       11       12       13       14
:)   Value      0.127232 0.046676 0.029799 0.017891 0.015934 0.008596 0.007111 0.00477 0.002099 0.001318 0.000441 0.000138 0.000008 0 
:)   Percentage 48.56%   17.81%   11.37%   6.83%    6.08%    3.28%    2.71%    1.82%   0.8%     0.5%     0.17%    0.05%    0%       0%
:)   
:)   
:)    Rows:
:)           Cheongdam Daechi   Guro Gwanghui  Jamsil Jongno Munjeong Myeongdong Samseong Seocho Seongsu Sinchon Yeoksam Yeongdeungpo  Yeoui
:)   Mass        0.000  0.000  0.000    0.000  0.0000  0.000   0.0000      0.000    0.000  0.000   0.000   0.000   0.000        0.000  0.000
:)   ChiDist     0.327  0.427  0.589    0.671  0.4746  0.431   0.6054      0.547    0.425  0.434   0.622   0.565   0.382        0.437  0.705
:)   Inertia     0.000  0.000  0.000    0.000  0.0000  0.000   0.0000      0.000    0.000  0.000   0.000   0.000   0.000        0.000  0.000
:)   Dim. 1     -0.710 -0.532 -1.101   -1.371 -0.2896 -0.276  -1.1821      1.398   -0.268 -0.215  -1.576  -0.803   0.272       -0.356  1.792
:)   Dim. 2     -0.259 -0.773  0.290    1.522 -0.0418  0.558   0.0915      0.986   -1.098 -0.855   0.863  -0.333  -1.562        1.175 -0.793
:)   
:)   
:)    Columns:
:)                MF      WR     TS      AF      IC     FI       RE      PT      BF     PA      EC      HS       RS       PS
:)   Mass     0.0520  0.2009 0.0287 0.09128  0.0970 0.0762  0.03391  0.1298  0.1139 0.0284  0.0494  0.0506  0.01616  0.03179
:)   ChiDist  1.0526  0.2557 0.7084 0.24421  0.3686 0.9087  0.32285  0.3451  0.3321 0.9125  0.6748  0.4980  0.41360  0.26995
:)   Inertia  0.0576  0.0131 0.0144 0.00544  0.0132 0.0630  0.00353  0.0155  0.0126 0.0237  0.0225  0.0125  0.00276  0.00232
:)   Dim. 1  -2.5274 -0.2748 0.4874 0.15428  0.2157 2.3958  0.06432  0.1477  0.1914 1.3673 -1.4104 -0.8778 -0.44173 -0.18556
:)   Dim. 2   2.1164  0.3799 2.3748 0.20476 -0.5843 0.5163 -0.92507 -1.3161 -0.1290 2.3666 -1.3195 -0.4882 -0.53571 -0.23864
4.2.4.3.1 some stats
a = sum(jongsaja_comm_filtered_acomp_clr_pca$colinertia[1:3])
b = sum(jongsaja_comm_filtered_acomp_clr_pca$colinertia)
a / b
:)   [1] 0.325
48.56 + 17.81
:)   [1] 66.4
48.56 + 17.81 + 11.37
:)   [1] 77.7
a = sum(jongsaja_comm_filtered_acomp_clr_pca$colinertia[1:3])
b = sum(jongsaja_comm_filtered_acomp_clr_pca$colinertia)
a / b
:)   [1] 0.325
4.2.4.3.2 biplot

tiff("output/biplot.tiff", units="in", width=11, height=8, res=300)

easyCODA::PLOT.PCA(jongsaja_comm_filtered_acomp_clr_pca,
                   map = "contribution",
                   dim = c(1, 2),
                   colarrows = "pink",
                   cex = c(0.8, 1.2),
                   main = "",
                   rescale = 1,
                   fonts = c(1, 1))                   # the default 가로 세로 비율

dev.off()
:)   png 
:)     2

4.2.5 clustering

4.2.5.1 PCs

b <- dong.sf_commune_filtered %>%
  distinct(name) %>%
  pull(name)
b 
:)    [1] "Cheongdam"    "Daechi"       "Guro"         "Gwanghui"     "Jamsil"       "Jongno"       "Munjeong"     "Myeongdong"   "Samseong"     "Seocho"       "Seongsu"      "Sinchon"      "Yeoksam"      "Yeongdeungpo" "Yeoui"

4.2.5.2 coordinates

pca_coords <- jongsaja_comm_filtered_acomp_clr_pca$rowcoord
pca_coords <- pca_coords %>%
  as.data.frame() 
  
rownames(pca_coords) <- b
pca_coords <- pca_coords %>%
  select(1:3)
pca_coords
:)                    V1      V2       V3
:)   Cheongdam    -0.710 -0.2592  0.36534
:)   Daechi       -0.532 -0.7735  0.53383
:)   Guro         -1.101  0.2903 -1.96932
:)   Gwanghui     -1.371  1.5221  0.43328
:)   Jamsil       -0.290 -0.0418  1.64406
:)   Jongno       -0.276  0.5582  1.96510
:)   Munjeong     -1.182  0.0915  0.88923
:)   Myeongdong    1.398  0.9863 -0.15182
:)   Samseong     -0.268 -1.0984 -1.30969
:)   Seocho       -0.215 -0.8552 -0.26724
:)   Seongsu      -1.576  0.8631  0.47667
:)   Sinchon      -0.803 -0.3334  1.86651
:)   Yeoksam       0.272 -1.5625  0.44257
:)   Yeongdeungpo -0.356  1.1747 -0.96834
:)   Yeoui         1.792 -0.7926 -0.00307

4.2.5.3 WARD

4.2.5.3.1 equal weight among PCs
4.2.5.3.2 different weight by each PC’s importance
eigen <- c(0.127232, 0.046676, 0.029799)  

pca_coords <- pca_coords %>%
  mutate(V1 = V1 * eigen[1], 
         V2 = V2 * eigen[2],
         V3 = V3 * eigen[3])
pca_coords
:)                     V1       V2         V3
:)   Cheongdam    -0.0903 -0.01210  0.0108866
:)   Daechi       -0.0677 -0.03610  0.0159077
:)   Guro         -0.1401  0.01355 -0.0586839
:)   Gwanghui     -0.1744  0.07105  0.0129113
:)   Jamsil       -0.0368 -0.00195  0.0489915
:)   Jongno       -0.0351  0.02605  0.0585581
:)   Munjeong     -0.1504  0.00427  0.0264981
:)   Myeongdong    0.1779  0.04604 -0.0045241
:)   Samseong     -0.0341 -0.05127 -0.0390274
:)   Seocho       -0.0273 -0.03992 -0.0079636
:)   Seongsu      -0.2005  0.04028  0.0142042
:)   Sinchon      -0.1022 -0.01556  0.0556202
:)   Yeoksam       0.0346 -0.07293  0.0131881
:)   Yeongdeungpo -0.0453  0.05483 -0.0288555
:)   Yeoui         0.2280 -0.03700 -0.0000915
jongsaja_comm_filtered_acomp_clr_ward <- easyCODA::WARD(pca_coords,
                                               weight = F,
                                               row.wt = row_weight
                                               #row.wt = rep(1/15, 15)
                                               )
jongsaja_comm_filtered_acomp_clr_ward
:)   Number of objects: 15

4.2.5.4 choose the number of clusters

jongsaja_comm_filtered_acomp_mat <- jongsaja_comm_filtered_acomp %>% 
    as.matrix() %>%
    as.data.frame()
jongsaja_comm_filtered_acomp_mat
:)                 MF    WR     TS     AF     IC     FI     RE     PT     BF      PA     EC     HS      RS     PS
:)   Cheongdam 0.0427 0.226 0.0146 0.1073 0.0791 0.0343 0.0480 0.1623 0.1047 0.02075 0.0351 0.0573 0.01279 0.0552
:)   Daechi    0.0336 0.202 0.0245 0.0813 0.0807 0.0804 0.0572 0.1306 0.1078 0.00402 0.0869 0.0650 0.01603 0.0299
:)   Guro      0.1296 0.185 0.0267 0.0505 0.2057 0.0291 0.0165 0.1282 0.1267 0.00774 0.0263 0.0368 0.00776 0.0230
:)   Gwanghui  0.1661 0.336 0.0130 0.0914 0.0358 0.0296 0.0244 0.0829 0.0817 0.02980 0.0378 0.0397 0.01184 0.0198
:)   Jamsil    0.0163 0.242 0.0264 0.1193 0.1484 0.0520 0.0394 0.0708 0.0554 0.02527 0.0597 0.0660 0.03937 0.0401
:)    [ reached 'max' / getOption("max.print") -- omitted 10 rows ]

4.2.6 emp_mode

4.2.6.1 define cluster

emp_mode <- as.data.frame(as.matrix(cutree(jongsaja_comm_filtered_acomp_clr_ward, k = 4))) %>%
    rownames_to_column() %>%
    as_tibble() %>%
    bind_cols(rownames(jongsaja_comm_filtered_acomp_clr)) %>%
    select(-rowname)
colnames(emp_mode) <- c("cluster", "name") 
emp_mode
:)   # A tibble: 15 × 2
:)      cluster name        
:)        <int> <chr>       
:)    1       1 Cheongdam   
:)    2       1 Daechi      
:)    3       2 Guro        
:)    4       2 Gwanghui    
:)    5       1 Jamsil      
:)    6       1 Jongno      
:)    7       2 Munjeong    
:)    8       3 Myeongdong  
:)    9       4 Samseong    
:)   10       4 Seocho      
:)   11       2 Seongsu     
:)   12       1 Sinchon     
:)   13       4 Yeoksam     
:)   14       1 Yeongdeungpo
:)   15       3 Yeoui

4.2.6.2 share of KBI and CSI

SSS(skilled scalable services) (Eckert et al., 2020)

KBI_CSI <- jongsaja_comm_filtered_acomp %>%
    rownames_to_column("name") %>%
    as_tibble() %>%
    mutate(share_of_KBI = (`IC` + `FI` + `PT`) * 100,
           share_of_CSI = (`WR` + `AF` + `PS`) * 100) %>%
    select(name, share_of_KBI, share_of_CSI) %>%
    arrange(desc(share_of_KBI))
KBI_CSI
:)   # A tibble: 15 × 3
:)      name         share_of_KBI share_of_CSI
:)      <chr>               <dbl>        <dbl>
:)    1 Yeoui                57.2         18.3
:)    2 Yeoksam              41.7         29.9
:)    3 Myeongdong           40.1         29.7
:)    4 Guro                 36.3         25.9
:)    5 Seocho               35.7         25.2
:)    6 Samseong             31.0         37.8
:)    7 Daechi               29.2         31.3
:)    8 Jongno               28.6         32.9
:)    9 Cheongdam            27.6         38.8
:)   10 Jamsil               27.1         40.1
:)   11 Munjeong             24.9         32.7
:)   12 Yeongdeungpo         22.2         30.1
:)   13 Seongsu              19.3         31.7
:)   14 Sinchon              18.7         36.7
:)   15 Gwanghui             14.8         44.7
emp_mode <- emp_mode %>%
    left_join(KBI_CSI, by = "name") 
emp_mode
:)   # A tibble: 15 × 4
:)      cluster name         share_of_KBI share_of_CSI
:)        <int> <chr>               <dbl>        <dbl>
:)    1       1 Cheongdam            27.6         38.8
:)    2       1 Daechi               29.2         31.3
:)    3       2 Guro                 36.3         25.9
:)    4       2 Gwanghui             14.8         44.7
:)    5       1 Jamsil               27.1         40.1
:)    6       1 Jongno               28.6         32.9
:)    7       2 Munjeong             24.9         32.7
:)    8       3 Myeongdong           40.1         29.7
:)    9       4 Samseong             31.0         37.8
:)   10       4 Seocho               35.7         25.2
:)   11       2 Seongsu              19.3         31.7
:)   12       1 Sinchon              18.7         36.7
:)   13       4 Yeoksam              41.7         29.9
:)   14       1 Yeongdeungpo         22.2         30.1
:)   15       3 Yeoui                57.2         18.3

4.2.6.3 sorting and naming

emp_mode <- emp_mode %>%
    mutate(cluster = as.character(cluster)) %>%
    mutate(cluster = if_else(cluster == "1", "Mixed", cluster),
           cluster = if_else(cluster == "2", "Manufacturing", cluster),
           cluster = if_else(cluster == "3", "Financial", cluster),
           cluster = if_else(cluster == "4", "Professional", cluster)) %>%
    mutate(cluster = factor(cluster, levels = c("Manufacturing", "Mixed", "Professional", "Financial")))
emp_mode
:)   # A tibble: 15 × 4
:)      cluster       name         share_of_KBI share_of_CSI
:)      <fct>         <chr>               <dbl>        <dbl>
:)    1 Mixed         Cheongdam            27.6         38.8
:)    2 Mixed         Daechi               29.2         31.3
:)    3 Manufacturing Guro                 36.3         25.9
:)    4 Manufacturing Gwanghui             14.8         44.7
:)    5 Mixed         Jamsil               27.1         40.1
:)    6 Mixed         Jongno               28.6         32.9
:)    7 Manufacturing Munjeong             24.9         32.7
:)    8 Financial     Myeongdong           40.1         29.7
:)    9 Professional  Samseong             31.0         37.8
:)   10 Professional  Seocho               35.7         25.2
:)   11 Manufacturing Seongsu              19.3         31.7
:)   12 Mixed         Sinchon              18.7         36.7
:)   13 Professional  Yeoksam              41.7         29.9
:)   14 Mixed         Yeongdeungpo         22.2         30.1
:)   15 Financial     Yeoui                57.2         18.3

4.2.6.4 dong.sf_commune_filtered

dong.sf_commune_filtered <- dong.sf_commune_filtered %>%
    left_join(emp_mode, by = "name") 
dong.sf_commune_filtered
:)   Simple feature collection with 15 features and 10 fields
:)   Geometry type: GEOMETRY
:)   Dimension:     XY
:)   Bounding box:  xmin: 944000 ymin: 1940000 xmax: 970000 ymax: 1960000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 15 × 11
:)      name                                                                                                     geometry      area total_emp density_emp log_total_emp log_density_emp total_inflow cluster       share_of_KBI share_of_CSI
:)      <chr>                                                                                              <GEOMETRY [m]>     [m^2]     <dbl>       <dbl>         <dbl>           <dbl>        <dbl> <fct>                <dbl>        <dbl>
:)    1 Cheongdam    POLYGON ((960137 1945124, 960129 1945145, 960128 1945146, 960111 1945193, 960109 1945199, 960100 ...  7369461.    155890       212.           12.0            5.35     1519280. Mixed                 27.6         38.8
:)    2 Daechi       POLYGON ((961579 1941234, 961557 1941223, 961545 1941217, 961524 1941214, 961492 1941208, 961476 ... 10038396.    116633       116.           11.7            4.76     1217771. Mixed                 29.2         31.3
:)    3 Guro         POLYGON ((946589 1942280, 946594 1942258, 946602 1942228, 946607 1942205, 946608 1942204, 946611 ... 11092136.    336770       304.           12.7            5.72     2382391. Manufacturing         36.3         25.9
:)    4 Gwanghui     POLYGON ((957266 1950489, 957262 1950476, 957249 1950474, 957245 1950468, 957219 1950435, 957203 ...  6689535.    159615       239.           12.0            5.47     1212202. Manufacturing         14.8         44.7
:)    5 Jamsil       POLYGON ((966155 1945440, 966223 1945405, 966274 1945378, 966281 1945375, 966285 1945373, 966285 ... 12092064.    123030       102.           11.7            4.62     1016366. Mixed                 27.1         40.1
:)    6 Jongno       POLYGON ((955802 1953069, 955799 1953064, 955798 1953061, 955797 1953059, 955795 1953054, 955794 ...  6346426.    129040       203.           11.8            5.31     1775598. Mixed                 28.6         32.9
:)    7 Munjeong     POLYGON ((970042 1944496, 970040 1944493, 970036 1944485, 970036 1944484, 970036 1944481, 970036 ... 11209772.    110472        98.5          11.6            4.59     1019500. Manufacturing         24.9         32.7
:)    8 Myeongdong   POLYGON ((954497 1950831, 954497 1950820, 954502 1950815, 954506 1950812, 954515 1950804, 954511 ...  2809133.    217113       773.           12.3            6.65     2325144. Financial             40.1         29.7
:)    9 Samseong     POLYGON ((962184 1944598, 962175 1944598, 962166 1944598, 962183 1944573, 962189 1944556, 962190 ...  3779139.    120988       320.           11.7            5.77     1198605. Professional          31.0         37.8
:)   10 Seocho       POLYGON ((956921 1944435, 956929 1944385, 956930 1944376, 956935 1944348, 956941 1944319, 956948 ...  4377955.    103264       236.           11.5            5.46      988172. Professional          35.7         25.2
:)   11 Seongsu      POLYGON ((961365 1949039, 961352 1949009, 961351 1949007, 961348 1948999, 961341 1948985, 961338 ... 13747639.    155725       113.           12.0            4.73     1363683. Manufacturing         19.3         31.7
:)   12 Sinchon      POLYGON ((948402 1949282, 948351 1949283, 947902 1949285, 947774 1949286, 947725 1949286, 947714 ... 14590732.    152826       105.           11.9            4.65     1278512. Mixed                 18.7         36.7
:)   13 Yeoksam      POLYGON ((958823 1942849, 958802 1942857, 958771 1942870, 958771 1942868, 958777 1942838, 958776 ...  4469436.    214306       479.           12.3            6.17     2447083. Professional          41.7         29.9
:)   14 Yeongdeungpo GEOMETRYCOLLECTION (POLYGON ((947987 1947559, 948020 1947495, 948038 1947459, 948052 1947433, 948...  9328081.    154833       166.           12.0            5.11     1296826. Mixed                 22.2         30.1
:)   15 Yeoui        MULTIPOLYGON (((949183 1949093, 949187 1949091, 949190 1949094, 949206 1949086, 949211 1949085, 9...  8431326.    151575       180.           11.9            5.19     1738367. Financial             57.2         18.3
dong.sf_commune_filtered %>%
    st_drop_geometry() %>%
    group_by(cluster) %>%
    summarise(mean_density_emp = weighted.mean(density_emp, total_emp),
              mean_share_of_KBI = weighted.mean(share_of_KBI, total_emp),
              mean_share_of_CSI = weighted.mean(share_of_CSI, total_emp))
:)   # A tibble: 4 × 4
:)     cluster       mean_density_emp mean_share_of_KBI mean_share_of_CSI
:)     <fct>                    <dbl>             <dbl>             <dbl>
:)   1 Manufacturing             221.              26.7              32.0
:)   2 Mixed                     153.              25.3              35.0
:)   3 Professional              378.              37.3              31.0
:)   4 Financial                 529.              47.1              25.0

4.2.7 mapping

4.2.7.1 figure 1b

4.2.7.2 figure 3b

4.3 radar plot

#remotes::install_github("ricardo-bion/ggradar")
library(ggradar)

4.3.1 compositional data

jongsaja_comm_filtered_acomp <- jongsaja_comm_filtered %>%
  column_to_rownames('name') %>%
  as.matrix() %>%
  as.table() %>% 
  compositions::acomp() %>%
    as.matrix() %>%
    as.data.frame()
jongsaja_comm_filtered_acomp
:)                 MF    WR     TS     AF     IC     FI     RE     PT     BF      PA     EC     HS      RS     PS
:)   Cheongdam 0.0427 0.226 0.0146 0.1073 0.0791 0.0343 0.0480 0.1623 0.1047 0.02075 0.0351 0.0573 0.01279 0.0552
:)   Daechi    0.0336 0.202 0.0245 0.0813 0.0807 0.0804 0.0572 0.1306 0.1078 0.00402 0.0869 0.0650 0.01603 0.0299
:)   Guro      0.1296 0.185 0.0267 0.0505 0.2057 0.0291 0.0165 0.1282 0.1267 0.00774 0.0263 0.0368 0.00776 0.0230
:)   Gwanghui  0.1661 0.336 0.0130 0.0914 0.0358 0.0296 0.0244 0.0829 0.0817 0.02980 0.0378 0.0397 0.01184 0.0198
:)   Jamsil    0.0163 0.242 0.0264 0.1193 0.1484 0.0520 0.0394 0.0708 0.0554 0.02527 0.0597 0.0660 0.03937 0.0401
:)    [ reached 'max' / getOption("max.print") -- omitted 10 rows ]
jongsaja_comm_filtered_acomp <- jongsaja_comm_filtered_acomp * 100
jongsaja_comm_filtered_acomp
:)                MF   WR   TS    AF    IC   FI   RE    PT    BF    PA   EC   HS    RS   PS
:)   Cheongdam  4.27 22.6 1.46 10.73  7.91 3.43 4.80 16.23 10.47 2.075 3.51 5.73 1.279 5.52
:)   Daechi     3.36 20.2 2.45  8.13  8.07 8.04 5.72 13.06 10.78 0.402 8.69 6.50 1.603 2.99
:)   Guro      12.96 18.5 2.67  5.05 20.57 2.91 1.65 12.82 12.67 0.774 2.63 3.68 0.776 2.30
:)   Gwanghui  16.61 33.6 1.30  9.14  3.58 2.96 2.44  8.29  8.17 2.980 3.78 3.97 1.184 1.98
:)   Jamsil     1.63 24.2 2.64 11.93 14.84 5.20 3.94  7.08  5.54 2.527 5.97 6.60 3.937 4.01
:)    [ reached 'max' / getOption("max.print") -- omitted 10 rows ]
a <- emp_mode %>%
  select(cluster, name)
a
:)   # A tibble: 15 × 2
:)      cluster       name        
:)      <fct>         <chr>       
:)    1 Mixed         Cheongdam   
:)    2 Mixed         Daechi      
:)    3 Manufacturing Guro        
:)    4 Manufacturing Gwanghui    
:)    5 Mixed         Jamsil      
:)    6 Mixed         Jongno      
:)    7 Manufacturing Munjeong    
:)    8 Financial     Myeongdong  
:)    9 Professional  Samseong    
:)   10 Professional  Seocho      
:)   11 Manufacturing Seongsu     
:)   12 Mixed         Sinchon     
:)   13 Professional  Yeoksam     
:)   14 Mixed         Yeongdeungpo
:)   15 Financial     Yeoui
jongsaja_comm_filtered_acomp_cluster <- jongsaja_comm_filtered_acomp %>%
    bind_cols(a) %>%
    select(-name) %>%
  relocate(EC, PT, IC, PA, FI, RS, RE, HS, WR, MF, BF, PS, TS, AF) %>%
  rownames_to_column('name') %>%
  relocate(name, cluster) 
jongsaja_comm_filtered_acomp_cluster
:)          name       cluster   EC    PT    IC    PA   FI    RS   RE   HS   WR    MF    BF   PS   TS    AF
:)   1 Cheongdam         Mixed 3.51 16.23  7.91 2.075 3.43 1.279 4.80 5.73 22.6  4.27 10.47 5.52 1.46 10.73
:)   2    Daechi         Mixed 8.69 13.06  8.07 0.402 8.04 1.603 5.72 6.50 20.2  3.36 10.78 2.99 2.45  8.13
:)   3      Guro Manufacturing 2.63 12.82 20.57 0.774 2.91 0.776 1.65 3.68 18.5 12.96 12.67 2.30 2.67  5.05
:)   4  Gwanghui Manufacturing 3.78  8.29  3.58 2.980 2.96 1.184 2.44 3.97 33.6 16.61  8.17 1.98 1.30  9.14
:)    [ reached 'max' / getOption("max.print") -- omitted 11 rows ]
rm(a)
for (i in 3:15) {
  min <- min(jongsaja_comm_filtered_acomp_cluster[i])
  print(min)
}
:)   [1] 1.02
:)   [1] 7.08
:)   [1] 3.58
:)   [1] 0.402
:)   [1] 1.36
:)   [1] 0.752
:)   [1] 1.65
:)   [1] 1.7
:)   [1] 8.87
:)   [1] 0.433
:)   [1] 5.54
:)   [1] 1.88
:)   [1] 0.839
for (i in 3:15) {
  max <- max(jongsaja_comm_filtered_acomp_cluster[i])
  print(max)
}
:)   [1] 12.2
:)   [1] 21.6
:)   [1] 20.6
:)   [1] 7.13
:)   [1] 31.5
:)   [1] 3.94
:)   [1] 5.72
:)   [1] 10.2
:)   [1] 33.6
:)   [1] 16.6
:)   [1] 25
:)   [1] 5.52
:)   [1] 8.58

4.3.2 visualization

4.3.2.1 15 centers

entire <- jongsaja_comm_filtered_acomp_cluster %>%
  summarise(across(is.numeric, mean)) %>%
  rownames_to_column('name')

entire$name <- "average"

entire 
:)        name   EC PT  IC   PA   FI   RS   RE   HS   WR  MF   BF   PS   TS   AF
:)   1 average 4.94 13 9.7 2.84 7.62 1.62 3.39 5.06 20.1 5.2 11.4 3.18 2.87 9.13

4.3.2.2 Manufacturing

a <- jongsaja_comm_filtered_acomp_cluster %>%
  filter(cluster == "Manufacturing") %>%
  select(-cluster)
a
:)         name   EC    PT    IC    PA   FI    RS   RE   HS   WR    MF    BF   PS   TS   AF
:)   1     Guro 2.63 12.82 20.57 0.774 2.91 0.776 1.65 3.68 18.5 12.96 12.67 2.30 2.67 5.05
:)   2 Gwanghui 3.78  8.29  3.58 2.980 2.96 1.184 2.44 3.97 33.6 16.61  8.17 1.98 1.30 9.14
:)   3 Munjeong 5.06 16.75  6.80 3.203 1.36 1.496 5.26 7.65 21.2  4.05  7.06 3.19 8.58 8.30
:)   4  Seongsu 7.31  9.92  7.41 2.441 2.02 1.284 3.54 5.52 19.4 15.97  8.82 4.45 4.09 7.79
b <- a %>%
  summarise_at(2:15, mean) %>%
  rownames_to_column('name')
b$name <- "mean"
b
:)     name   EC   PT   IC   PA   FI   RS   RE  HS   WR   MF   BF   PS   TS   AF
:)   1 mean 4.69 11.9 9.59 2.35 2.31 1.19 3.22 5.2 23.2 12.4 9.18 2.98 4.16 7.57
b <- b %>%
  bind_rows(entire)
b
:)        name   EC   PT   IC   PA   FI   RS   RE   HS   WR   MF    BF   PS   TS   AF
:)   1    mean 4.69 11.9 9.59 2.35 2.31 1.19 3.22 5.20 23.2 12.4  9.18 2.98 4.16 7.57
:)   2 average 4.94 13.0 9.70 2.84 7.62 1.62 3.39 5.06 20.1  5.2 11.39 3.18 2.87 9.13

4.3.2.3 Mixed

a <- jongsaja_comm_filtered_acomp_cluster %>%
  filter(cluster == "Mixed") %>%
  select(-cluster)
a
:)          name    EC    PT    IC    PA   FI   RS   RE    HS   WR   MF    BF   PS   TS    AF
:)   1 Cheongdam  3.51 16.23  7.91 2.075 3.43 1.28 4.80  5.73 22.6 4.27 10.47 5.52 1.46 10.73
:)   2    Daechi  8.69 13.06  8.07 0.402 8.04 1.60 5.72  6.50 20.2 3.36 10.78 2.99 2.45  8.13
:)   3    Jamsil  5.97  7.08 14.84 2.527 5.20 3.94 3.94  6.60 24.2 1.63  5.54 4.01 2.64 11.93
:)   4    Jongno  7.06  9.68  9.13 5.436 9.78 2.37 2.73  7.72 17.0 4.79  6.29 3.17 2.11 12.79
:)   5   Sinchon 12.15  8.54  7.26 1.640 2.91 2.08 2.04 10.24 15.3 2.08 10.73 4.09 3.64 17.24
:)    [ reached 'max' / getOption("max.print") -- omitted 1 rows ]
b <- a %>%
  summarise_at(2:15, mean) %>%
  rownames_to_column('name')
b$name <- "mean"
b
:)     name   EC   PT   IC   PA FI   RS   RE   HS   WR   MF   BF   PS   TS   AF
:)   1 mean 6.65 10.3 9.25 2.36  6 2.08 3.56 6.81 19.8 3.79 11.5 3.81 2.73 11.4
b <- b %>%
  bind_rows(entire)
b
:)        name   EC   PT   IC   PA   FI   RS   RE   HS   WR   MF   BF   PS   TS    AF
:)   1    mean 6.65 10.3 9.25 2.36 6.00 2.08 3.56 6.81 19.8 3.79 11.5 3.81 2.73 11.37
:)   2 average 4.94 13.0 9.70 2.84 7.62 1.62 3.39 5.06 20.1 5.20 11.4 3.18 2.87  9.13

4.3.2.4 Professional

a <- jongsaja_comm_filtered_acomp_cluster %>%
  filter(cluster == "Professional") %>%
  select(-cluster)
a
:)         name   EC   PT   IC    PA   FI   RS   RE   HS   WR   MF    BF   PS    TS   AF
:)   1 Samseong 3.72 16.8 10.3 0.746 3.90 1.60 3.22 2.06 28.8 1.52 17.28 1.88 0.994 7.19
:)   2   Seocho 4.99 21.2 11.7 6.645 2.84 2.74 3.16 2.91 15.2 1.65 15.27 3.33 1.762 6.65
:)   3  Yeoksam 4.55 21.6 10.4 1.108 9.67 1.05 4.96 5.02 18.1 1.08  9.79 2.82 0.839 9.02
b <- a %>%
  summarise_at(2:15, mean) %>%
  rownames_to_column('name')
b$name <- "mean"
b
:)     name   EC   PT   IC   PA   FI  RS   RE   HS   WR   MF   BF   PS  TS   AF
:)   1 mean 4.42 19.9 10.8 2.83 5.47 1.8 3.78 3.33 20.7 1.42 14.1 2.68 1.2 7.62
b <- b %>%
  bind_rows(entire)
b
:)        name   EC   PT   IC   PA   FI   RS   RE   HS   WR   MF   BF   PS   TS   AF
:)   1    mean 4.42 19.9 10.8 2.83 5.47 1.80 3.78 3.33 20.7 1.42 14.1 2.68 1.20 7.62
:)   2 average 4.94 13.0  9.7 2.84 7.62 1.62 3.39 5.06 20.1 5.20 11.4 3.18 2.87 9.13

4.3.2.5 Financial

a <- jongsaja_comm_filtered_acomp_cluster %>%
  filter(cluster == "Financial") %>%
  select(-cluster)
a
:)           name   EC   PT    IC   PA   FI    RS   RE   HS    WR    MF   BF   PS   TS   AF
:)   1 Myeongdong 1.02 10.2  8.73 7.13 21.2 0.752 2.48 1.70 18.76 0.906 10.9 2.06 5.20 8.91
:)   2      Yeoui 1.17 15.1 10.53 3.43 31.5 0.899 2.79 2.55  8.87 0.433 12.0 2.83 1.18 6.64
b <- a %>%
  summarise_at(2:15, mean) %>%
  rownames_to_column('name')
b$name <- "mean"
b
:)     name   EC   PT   IC   PA   FI    RS   RE   HS   WR    MF   BF   PS   TS   AF
:)   1 mean 1.09 12.7 9.63 5.28 26.4 0.826 2.64 2.12 13.8 0.669 11.5 2.44 3.19 7.78
b <- b %>%
  bind_rows(entire)
b
:)        name   EC   PT   IC   PA    FI    RS   RE   HS   WR    MF   BF   PS   TS   AF
:)   1    mean 1.09 12.7 9.63 5.28 26.35 0.826 2.64 2.12 13.8 0.669 11.5 2.44 3.19 7.78
:)   2 average 4.94 13.0 9.70 2.84  7.62 1.616 3.39 5.06 20.1 5.196 11.4 3.18 2.87 9.13

4.3.2.6 output

r1234 %>%
  ggpubr::ggexport(filename = 'output/fig_3b.png', width = 1000, height = 850, res = 100)
b %>%
  mutate(name = ifelse(name != "mean", "average share across all 15 centers", "average share within each cluster")) %>%
  ggradar(grid.min = 10, grid.mid = 20, grid.max = 30,
        gridline.min.colour = "grey", gridline.mid.colour = "grey", gridline.max.colour = "grey",
        grid.line.width = 0.8, grid.label.size = 8,
        centre.y = 0.4,
        values.radar = c("10%", "20%", "30%"),
        group.colours = c("orange", "blue"),
        group.point.size = 1.8, group.line.width = 0.8,
        fill = TRUE, fill.alpha = 0.10, 
        plot.title = "Financial",
        #background.circle.colour = "white",
        base.size = 13,
        axis.line.colour = "grey90",
        label.centre.y = F,
        plot.legend = T)  

4.4 clean your RAM

rm(b)
rm(p1, p2, p3, p4)
rm(r1, r2, r3, r4)
rm(row_weight, THRESHOLD,  LR_wt, maxmin)
rm(KBI_CSI)
rm(list=ls(pattern = "jongsaja"))



5 making the longitudinal dataset

5.1 matching mobility code to shp code

5.1.1 shp code

dong.sf_resid_tb
:)   # A tibble: 1,123 × 3
:)      adm_nm                 adm_cd  college
:)    * <chr>                  <chr>     <dbl>
:)    1 종로구_사직동          1101053   0.808
:)    2 종로구_삼청동          1101054   0.706
:)    3 종로구_부암동          1101055   0.708
:)    4 종로구_평창동          1101056   0.779
:)    5 종로구_무악동          1101057   0.780
:)    6 종로구_교남동          1101058   0.675
:)    7 종로구_가회동          1101060   0.603
:)    8 종로구_종로1.2.3.4가동 1101061   0.682
:)    9 종로구_종로5.6가동     1101063   0.537
:)   10 종로구_이화동          1101064   0.645
:)   # ℹ 1,113 more rows

5.1.2 mobility_code

mobility_code <- readxl::read_excel("adm_codes/서울생활이동데이터_행정동코드_20210907.xlsx")
colnames(mobility_code) <- c("sd_cd", "sgg_cd", "adm_cd", "adm_name", "adm_nm")
mobility_code
:)   # A tibble: 1,152 × 5
:)      sd_cd sgg_cd  adm_cd adm_name        adm_nm                           
:)      <dbl>  <dbl>   <dbl> <chr>           <chr>                            
:)    1 11000  11010 1101053 사직동          서울특별시 종로구 사직동         
:)    2 11000  11010 1101054 삼청동          서울특별시 종로구 삼청동         
:)    3 11000  11010 1101055 부암동          서울특별시 종로구 부암동         
:)    4 11000  11010 1101056 평창동          서울특별시 종로구 평창동         
:)    5 11000  11010 1101057 무악동          서울특별시 종로구 무악동         
:)    6 11000  11010 1101058 교남동          서울특별시 종로구 교남동         
:)    7 11000  11010 1101060 가회동          서울특별시 종로구 가회동         
:)    8 11000  11010 1101061 종로1·2·3·4가동 서울특별시 종로구 종로1·2·3·4가동
:)    9 11000  11010 1101063 종로5·6가동     서울특별시 종로구 종로5·6가동    
:)   10 11000  11010 1101064 이화동          서울특별시 종로구 이화동         
:)   # ℹ 1,142 more rows
mobility_code <- mobility_code %>%
  filter(str_sub(adm_cd, 1, 5) != "23320") %>%                                  # 23320이 옹진군임
  filter(as.numeric(adm_cd) > 0  & str_length(adm_cd) > 5) %>%
  filter(str_sub(adm_cd, 1, 2) %in% c("11", "23", "31")) %>%
  select(adm_cd, adm_nm) %>%
  mutate(adm_nm = stringr::str_replace_all(adm_nm, "·", ".")) %>%
  mutate(adm_nm_sgg = str_split_i(adm_nm, " ", -2),
         adm_nm_emd = str_split_i(adm_nm, " ", -1)) %>%
  mutate(adm_nm_sgg = ifelse(adm_nm_sgg == "남구", "미추홀구", adm_nm_sgg)) %>%   # 시군구 단위 개명: 남구 -> 미추홀구 
  mutate(adm_nm = str_c(adm_nm_sgg, "_", adm_nm_emd)) %>%
  select(-c(adm_nm_sgg, adm_nm_emd))
mobility_code
:)   # A tibble: 1,131 × 2
:)       adm_cd adm_nm                
:)        <dbl> <chr>                 
:)    1 1101053 종로구_사직동         
:)    2 1101054 종로구_삼청동         
:)    3 1101055 종로구_부암동         
:)    4 1101056 종로구_평창동         
:)    5 1101057 종로구_무악동         
:)    6 1101058 종로구_교남동         
:)    7 1101060 종로구_가회동         
:)    8 1101061 종로구_종로1.2.3.4가동
:)    9 1101063 종로구_종로5.6가동    
:)   10 1101064 종로구_이화동         
:)   # ℹ 1,121 more rows

5.1.3 matching

5.1.3.1 matched

mobility_code_1 <- mobility_code %>%
  left_join(dong.sf_resid_tb, by = "adm_nm") %>%
  rename(adm_cd_mb = adm_cd.x,
         adm_cd_shp = adm_cd.y) %>%
  filter(!is.na(college))
mobility_code_1
:)   # A tibble: 1,087 × 4
:)      adm_cd_mb adm_nm                 adm_cd_shp college
:)          <dbl> <chr>                  <chr>        <dbl>
:)    1   1101053 종로구_사직동          1101053      0.808
:)    2   1101054 종로구_삼청동          1101054      0.706
:)    3   1101055 종로구_부암동          1101055      0.708
:)    4   1101056 종로구_평창동          1101056      0.779
:)    5   1101057 종로구_무악동          1101057      0.780
:)    6   1101058 종로구_교남동          1101058      0.675
:)    7   1101060 종로구_가회동          1101060      0.603
:)    8   1101061 종로구_종로1.2.3.4가동 1101061      0.682
:)    9   1101063 종로구_종로5.6가동     1101063      0.537
:)   10   1101064 종로구_이화동          1101064      0.645
:)   # ℹ 1,077 more rows
#colSums(is.na(mobility_code_1))

5.1.3.2 unmatched(imputation)

mobility_code_2 <- mobility_code %>%
  left_join(dong.sf_resid_tb, by = "adm_nm") %>%
  rename(adm_cd_mb = adm_cd.x,
         adm_cd_shp = adm_cd.y) %>%
  filter(is.na(college))
mobility_code_2
:)   # A tibble: 44 × 4
:)      adm_cd_mb adm_nm             adm_cd_shp college
:)          <dbl> <chr>              <chr>        <dbl>
:)    1   2308069 서구_검단2동       <NA>            NA
:)    2   2308070 서구_검단3동       <NA>            NA
:)    3   2308071 서구_검단4동       <NA>            NA
:)    4   2308076 서구_검단1동       <NA>            NA
:)    5   2308077 서구_검단5동       <NA>            NA
:)    6   3101459 영통구_태장동      <NA>            NA
:)    7   3103053 의정부시_의정부3동 <NA>            NA
:)    8   3103061 의정부시_가능1동   <NA>            NA
:)    9   3105051 부천시_심곡2동     <NA>            NA
:)   10   3105052 부천시_심곡1동     <NA>            NA
:)   # ℹ 34 more rows
# 서구 검단1동, 검단2동, 검단3동, 검단4동, 검단5동 -> 검단동
# 영통구 태장동 -> 망포동
# 의정부시 의정부3동 -> 의정부1동
# 의정부시 가능1동 -> 가능동
# 부천시 심곡2동, 심곡1동, 심국3동, 원미2동, 소사동 -> 심곡동
# 부천시 원미1동, 역곡1동, 역곡2동, 춘의동, 도당동 -> 부천동
# 부천시 중동, 상동 -> 중동
# 부천시 중4동, 약대동, 중1동, 중2동, 중3동 -> 신중동
# 부천시 상2동, 상1동, 상3동 -> 상동
# 부천시 심곡본동, 심곡본1동, 송내1동, 송내2동 -> 대산동
# 부천시 소사본동, 소사본3동 -> 소사본동
# 부천시 괴안동, 범박동, 역곡3동 -> 범안동
# 부천시 성각동, 고강본동, 고강1동 -> 성곡동
# 부천시 오정동, 원종1동, 원종2동, 신흥동 -> 오정동
# 나머지 신도동, 퇴계원면, 동백동, 영덕동, 광남동은 버림 
# 부천시 참고: https://wehagothelp.zendesk.com/hc/ko/articles/360000329042-%EA%B2%BD%EA%B8%B0%EB%8F%84-%EB%B6%80%EC%B2%9C%EC%8B%9C-%ED%96%89%EC%A0%95%EA%B8%B0%EA%B4%80-%EC%BD%94%EB%93%9C-%EB%B0%8F-%EA%B4%80%ED%95%A0%EA%B5%AC%EC%97%AD-%EB%B2%95%EC%A0%95%EB%8F%99-%EB%B3%80%EA%B2%BD%EC%9C%BC%EB%A1%9C-%EC%9D%B8%ED%95%9C-%EC%97%85%EB%8D%B0%EC%9D%B4%ED%8A%B8-%EC%95%88%EB%82%B4

mobility_code_2 <- mobility_code_2 %>%
  mutate(adm_nm_temp = str_split_i(adm_nm, "_", 2)) %>%
  mutate(adm_nm_temp = ifelse(adm_nm_temp %in% c("검단1동", "검단2동", "검단3동", "검단4동", "검단5동"), "검단동",
                         ifelse(adm_nm_temp %in% "태장동", "망포1동",
                                ifelse(adm_nm_temp %in% "의정부3동", "의정부1동", adm_nm_temp)))) %>%
  mutate(adm_nm_temp = ifelse(adm_nm_temp %in% c("가능1동"), "가능동",
                         ifelse(adm_nm_temp %in% c("심곡2동", "심곡1동", "심곡3동", "원미2동", "소사동"), "심곡동",
                                ifelse(adm_nm_temp %in% c("원미1동", "역곡1동", "역곡2동", "춘의동", "도당동"), "부천동", adm_nm_temp)))) %>%
  mutate(adm_nm_temp = ifelse(adm_nm_temp %in% c("중동", "상동"), "중동",
                         ifelse(adm_nm_temp %in% c("중4동", "약대동", "중1동", "중2동", "중3동"), "신중동",
                                ifelse(adm_nm_temp %in% c("상2동", "상1동", "상3동"), "상동", adm_nm_temp)))) %>%
  mutate(adm_nm_temp = ifelse(adm_nm_temp %in% c("심곡본동", "심곡본1동", "송내1동", "송내2동"), "대산동",
                         ifelse(adm_nm_temp %in% c("소사본동", "소사본3동"), "소사본동",
                                ifelse(adm_nm_temp %in% c("괴안동", "범박동", "역곡3동"), "범안동", adm_nm_temp)))) %>%
  mutate(adm_nm_temp = ifelse(adm_nm_temp %in% c("성각동", "고강본동", "고강1동"), "성곡동",
                         ifelse(adm_nm_temp %in% c("오정동", "원종1동", "원종2동", "신흥동"), "오정동", adm_nm_temp))) %>%
  
  mutate(adm_nm = str_c(str_split_i(adm_nm, "_", 1), 
                        "_",
                        adm_nm_temp)) %>%
  filter(!adm_nm_temp %in% c("신도동", "퇴계원면", "동백동", "영덕동", "광남동")) %>%
  select(-adm_nm_temp)
mobility_code_2
:)   # A tibble: 39 × 4
:)      adm_cd_mb adm_nm             adm_cd_shp college
:)          <dbl> <chr>              <chr>        <dbl>
:)    1   2308069 서구_검단동        <NA>            NA
:)    2   2308070 서구_검단동        <NA>            NA
:)    3   2308071 서구_검단동        <NA>            NA
:)    4   2308076 서구_검단동        <NA>            NA
:)    5   2308077 서구_검단동        <NA>            NA
:)    6   3101459 영통구_망포1동     <NA>            NA
:)    7   3103053 의정부시_의정부1동 <NA>            NA
:)    8   3103061 의정부시_가능동    <NA>            NA
:)    9   3105051 부천시_심곡동      <NA>            NA
:)   10   3105052 부천시_심곡동      <NA>            NA
:)   # ℹ 29 more rows
colSums(is.na(mobility_code_2))
:)    adm_cd_mb     adm_nm adm_cd_shp    college 
:)            0          0         39         39
mobility_code_2 <- mobility_code_2 %>%
  select(adm_cd_mb, adm_nm) %>%
  left_join(dong.sf_resid_tb, by = "adm_nm") %>%
  rename(adm_cd_shp = adm_cd)
mobility_code_2
:)   # A tibble: 39 × 4
:)      adm_cd_mb adm_nm             adm_cd_shp college
:)          <dbl> <chr>              <chr>        <dbl>
:)    1   2308069 서구_검단동        2308080      0.326
:)    2   2308070 서구_검단동        2308080      0.326
:)    3   2308071 서구_검단동        2308080      0.326
:)    4   2308076 서구_검단동        2308080      0.326
:)    5   2308077 서구_검단동        2308080      0.326
:)    6   3101459 영통구_망포1동     3101467      0.653
:)    7   3103053 의정부시_의정부1동 3103069      0.276
:)    8   3103061 의정부시_가능동    3103068      0.253
:)    9   3105051 부천시_심곡동      3105087      0.269
:)   10   3105052 부천시_심곡동      3105087      0.269
:)   # ℹ 29 more rows
colSums(is.na(mobility_code_2))
:)    adm_cd_mb     adm_nm adm_cd_shp    college 
:)            0          0          0          0

5.1.3.3 integrate

mobility_code <- mobility_code_1 %>%
  bind_rows(mobility_code_2)
mobility_code
:)   # A tibble: 1,126 × 4
:)      adm_cd_mb adm_nm                 adm_cd_shp college
:)          <dbl> <chr>                  <chr>        <dbl>
:)    1   1101053 종로구_사직동          1101053      0.808
:)    2   1101054 종로구_삼청동          1101054      0.706
:)    3   1101055 종로구_부암동          1101055      0.708
:)    4   1101056 종로구_평창동          1101056      0.779
:)    5   1101057 종로구_무악동          1101057      0.780
:)    6   1101058 종로구_교남동          1101058      0.675
:)    7   1101060 종로구_가회동          1101060      0.603
:)    8   1101061 종로구_종로1.2.3.4가동 1101061      0.682
:)    9   1101063 종로구_종로5.6가동     1101063      0.537
:)   10   1101064 종로구_이화동          1101064      0.645
:)   # ℹ 1,116 more rows
colSums(is.na(mobility_code))
:)    adm_cd_mb     adm_nm adm_cd_shp    college 
:)            0          0          0          0
mobility_code <- mobility_code %>%
  mutate(adm_cd_mb = as.character(adm_cd_mb),
         adm_cd_shp = as.character(adm_cd_shp))
mobility_code
:)   # A tibble: 1,126 × 4
:)      adm_cd_mb adm_nm                 adm_cd_shp college
:)      <chr>     <chr>                  <chr>        <dbl>
:)    1 1101053   종로구_사직동          1101053      0.808
:)    2 1101054   종로구_삼청동          1101054      0.706
:)    3 1101055   종로구_부암동          1101055      0.708
:)    4 1101056   종로구_평창동          1101056      0.779
:)    5 1101057   종로구_무악동          1101057      0.780
:)    6 1101058   종로구_교남동          1101058      0.675
:)    7 1101060   종로구_가회동          1101060      0.603
:)    8 1101061   종로구_종로1.2.3.4가동 1101061      0.682
:)    9 1101063   종로구_종로5.6가동     1101063      0.537
:)   10 1101064   종로구_이화동          1101064      0.645
:)   # ℹ 1,116 more rows
# 참고용, 이상 없음
mobility_code %>%
  filter(adm_cd_mb != adm_cd_shp)
:)   # A tibble: 77 × 4
:)      adm_cd_mb adm_nm           adm_cd_shp college
:)      <chr>     <chr>            <chr>        <dbl>
:)    1 2303052   미추홀구_숭의2동 2309052      0.184
:)    2 2303054   미추홀구_숭의4동 2309054      0.245
:)    3 2303056   미추홀구_용현2동 2309056      0.340
:)    4 2303057   미추홀구_용현3동 2309057      0.242
:)    5 2303059   미추홀구_용현5동 2309059      0.340
:)    6 2303060   미추홀구_학익1동 2309060      0.364
:)    7 2303061   미추홀구_학익2동 2309061      0.361
:)    8 2303062   미추홀구_도화1동 2309062      0.245
:)    9 2303065   미추홀구_주안1동 2309065      0.272
:)   10 2303066   미추홀구_주안2동 2309066      0.216
:)   # ℹ 67 more rows
rm(mobility_code_1, mobility_code_2)
rm(list=ls(pattern = "mean_"))

5.2 stacking flow data

FI <- paste0("//192.168.0.22/Public/JaegeonLee_Personal/data_seoulmobility/", 
             list.files(path = "//192.168.0.22/Public/JaegeonLee_Personal/data_seoulmobility/"))

{r eval = FALSE} for (I in 1:length(FI)) { print(I)

fi <- as.character(FI[I])  
print(fi)

fi_within <- paste0(fi, "/", list.files(path = fi))
#print(fi_within)

for (i in 8:10) {
    
    print(fi_within[i])
    
    temp <- read_csv(as.character(fi_within[i]), locale=locale('ko',encoding='euc-kr'))

    colnames(temp) <- c("DEPRT_YM", "DAYOFWEEK", "DEPRT_HOUR", "DEPRTP", "DESTNTN", "GENDER", "AGE_GR", "FLOW_TYPE", "TRVL_TIME", "LIFE_FLPOP")
    
    temp <- temp %>%
        mutate_at(1:9, as.character) %>%
        mutate(LIFE_FLPOP = replace(LIFE_FLPOP, LIFE_FLPOP=="*", "2")) %>%
        mutate(LIFE_FLPOP = as.numeric(LIFE_FLPOP)) %>%
        filter(str_sub(DEPRTP, 1, 2) %in% c("11", "23", "31"),
               str_sub(DESTNTN, 1, 2) == "11") %>%
        filter(FLOW_TYPE == "HW") %>%
        filter(DAYOFWEEK %in% c("월", "화", "수", "목", "금")) %>%
        filter(as.numeric(AGE_GR) >= 20 & as.numeric(AGE_GR) < 60) 
    
    if (i == 8){
      data <- plyr::rbind.fill(temp)
      rm(temp) # 부산물 삭제
    } else {
      data <- plyr::rbind.fill(data, temp)
      rm(temp) #부산물 삭제
    }            
    
    #print(data)
}

# edit flow data
data <- data %>%
        group_by(DEPRTP, DESTNTN, GENDER, AGE_GR, TRVL_TIME) %>%                               
        summarise(LIFE_FLPOP = sum(LIFE_FLPOP)) 

# save
# indexing
if (I < 10){
      I <- as.character(paste0("0", I))
    } else {
      I <- as.character(I)
    } 
# to files
data %>%
    write_excel_csv(paste0("//192.168.0.22/Public/JaegeonLee_Personal/data_seoulmobility_abbreviated_20230528/",
                           I, 
                           ".csv"))

rm(data)

}

5.3 stacking time-varying predictors

5.3.1 pilot

temp <- read_csv("//192.168.0.22/Public/JaegeonLee_Personal/data_seoulmobility_abbreviated_20230528/01.csv", 
                 locale=locale('ko', encoding = 'UTF-8'))
temp
:)   # A tibble: 1,559,180 × 6
:)       DEPRTP DESTNTN GENDER AGE_GR TRVL_TIME LIFE_FLPOP
:)        <dbl>   <dbl> <chr>   <dbl>     <dbl>      <dbl>
:)    1 1101053 1101053 F          20        10       4   
:)    2 1101053 1101053 F          25        10     299.  
:)    3 1101053 1101053 F          25        20      26.5 
:)    4 1101053 1101053 F          30        10     209.  
:)    5 1101053 1101053 F          35        10     438.  
:)    6 1101053 1101053 F          35        20       3.01
:)    7 1101053 1101053 F          35        40       6.01
:)    8 1101053 1101053 F          40        10     143.  
:)    9 1101053 1101053 F          40        20      23.7 
:)   10 1101053 1101053 F          45        10     164.  
:)   # ℹ 1,559,170 more rows
temp <- temp %>%
  mutate(DEPRTP = as.character(DEPRTP),
         DESTNTN = as.character(DESTNTN)) %>%
  group_by(DEPRTP, DESTNTN, TRVL_TIME, LIFE_FLPOP) %>%
  summarise(LIFE_FLPOP = sum(LIFE_FLPOP)) %>%
  ungroup()
temp
:)   # A tibble: 1,541,030 × 4
:)      DEPRTP  DESTNTN TRVL_TIME LIFE_FLPOP
:)      <chr>   <chr>       <dbl>      <dbl>
:)    1 1101053 1101053        10        4  
:)    2 1101053 1101053        10       44.2
:)    3 1101053 1101053        10       84.3
:)    4 1101053 1101053        10       87.0
:)    5 1101053 1101053        10       87.6
:)    6 1101053 1101053        10       89.8
:)    7 1101053 1101053        10      109. 
:)    8 1101053 1101053        10      143. 
:)    9 1101053 1101053        10      164. 
:)   10 1101053 1101053        10      209. 
:)   # ℹ 1,541,020 more rows
# DEPRTP
temp <- temp %>%
  left_join(mobility_code, by = c("DEPRTP" = "adm_cd_mb")) %>%
  rename(H_adm_nm = adm_nm,
         H_adm_cd_shp = adm_cd_shp) %>%
  select(H_adm_nm, H_adm_cd_shp, DESTNTN, TRVL_TIME, LIFE_FLPOP, college) %>%

# DESTNTN
  left_join(membership_info_eng, by = c("DESTNTN" = "adm_cd")) %>%
  rename(W_commune_nm = name,
         W_adm_cd_shp = DESTNTN) %>%
  select(H_adm_nm, H_adm_cd_shp, W_commune_nm, W_adm_cd_shp, TRVL_TIME, LIFE_FLPOP, college) %>%
  
# DESTNTN aggregate
  group_by(H_adm_nm, H_adm_cd_shp, W_commune_nm) %>%
  summarise(TRVL_TIME = weighted.mean(TRVL_TIME, LIFE_FLPOP),
            LIFE_FLPOP = sum(LIFE_FLPOP),
            college = mean(college)) %>%
  ungroup() 
#%>%
#  mutate(Time = str_c(I))
temp
:)   # A tibble: 48,487 × 6
:)      H_adm_nm      H_adm_cd_shp W_commune_nm TRVL_TIME LIFE_FLPOP college
:)      <chr>         <chr>        <chr>            <dbl>      <dbl>   <dbl>
:)    1 가평군_가평읍 3137011      Anam              47.0       6.56   0.360
:)    2 가평군_가평읍 3137011      Bangbae           47.8      45.4    0.360
:)    3 가평군_가평읍 3137011      Banghak           30        18.1    0.360
:)    4 가평군_가평읍 3137011      Bangi             37.3     123.     0.360
:)    5 가평군_가평읍 3137011      Changshin         34.5     135.     0.360
:)    6 가평군_가평읍 3137011      Cheongdam         47.5     134.     0.360
:)    7 가평군_가평읍 3137011      Daechi            34.9     123.     0.360
:)    8 가평군_가평읍 3137011      Daehak            60         2      0.360
:)    9 가평군_가평읍 3137011      Gil               20        24.1    0.360
:)   10 가평군_가평읍 3137011      Gongneung         35.5     101.     0.360
:)   # ℹ 48,477 more rows

5.3.2 top destinations

#temp %>%
#  group_by(W_commune_nm) %>%
#  summarise(total_inflow = sum(LIFE_FLPOP)) %>%
#  arrange(desc(total_inflow))
#temp %>%
#  group_by(W_commune_nm) %>%
#  summarise(total_inflow = sum(LIFE_FLPOP)) %>%
#  arrange(desc(total_inflow)) %>%
#  write_excel_csv("data_industrial/data_topdestinations_20230608.csv")

5.3.3 import

FI <- paste0("//192.168.0.22/Public/JaegeonLee_Personal/data_seoulmobility_abbreviated_20230528/", 
             list.files(path = "//192.168.0.22/Public/JaegeonLee_Personal/data_seoulmobility_abbreviated_20230528/"))

5.3.4 binding rows

for (I in 1:length(FI)) { # in 1:length(FI) 임을 잊지말것 print(I)

temp <- read_csv(as.character(FI[I]), locale=locale('ko', encoding = 'UTF-8'))

if (I == 1){
  temp <- temp %>%
    mutate(DEPRTP = as.character(DEPRTP),
           DESTNTN = as.character(DESTNTN)) %>%
    group_by(DEPRTP, DESTNTN, TRVL_TIME, LIFE_FLPOP) %>%
    summarise(LIFE_FLPOP = sum(LIFE_FLPOP)) %>%
    ungroup()

  # DEPRTP
  temp <- temp %>%
    left_join(mobility_code, by = c("DEPRTP" = "adm_cd_mb")) %>%
    rename(H_adm_nm = adm_nm,
           H_adm_cd_shp = adm_cd_shp) %>%
    select(H_adm_nm, H_adm_cd_shp, DESTNTN, TRVL_TIME, LIFE_FLPOP, college) %>%
  
  # DESTNTN
    left_join(membership_info_eng, by = c("DESTNTN" = "adm_cd")) %>%
    rename(W_commune_nm = name,
           W_adm_cd_shp = DESTNTN) %>%
    select(H_adm_nm, H_adm_cd_shp, W_commune_nm, W_adm_cd_shp, TRVL_TIME, LIFE_FLPOP, college) %>%
    
  # DESTNTN aggregate
    group_by(H_adm_nm, H_adm_cd_shp, W_commune_nm) %>%
    summarise(TRVL_TIME = weighted.mean(TRVL_TIME, LIFE_FLPOP),
              LIFE_FLPOP = sum(LIFE_FLPOP),
              college = mean(college)) %>%
    ungroup() %>%
    mutate(Time = str_c(I))
  
    head(temp)

  data <- temp 
    rm(temp) 
    print("here")
      
    } 
else {
  temp <- temp %>%
    mutate(DEPRTP = as.character(DEPRTP),
           DESTNTN = as.character(DESTNTN)) %>%
    group_by(DEPRTP, DESTNTN, TRVL_TIME, LIFE_FLPOP) %>%
    summarise(LIFE_FLPOP = sum(LIFE_FLPOP)) %>%
    ungroup()

  # DEPRTP
  temp <- temp %>%
    left_join(mobility_code, by = c("DEPRTP" = "adm_cd_mb")) %>%
    rename(H_adm_nm = adm_nm,
           H_adm_cd_shp = adm_cd_shp) %>%
    select(H_adm_nm, H_adm_cd_shp, DESTNTN, TRVL_TIME, LIFE_FLPOP, college) %>%
  
  # DESTNTN
    left_join(membership_info_eng, by = c("DESTNTN" = "adm_cd")) %>%
    rename(W_commune_nm = name,
           W_adm_cd_shp = DESTNTN) %>%
    select(H_adm_nm, H_adm_cd_shp, W_commune_nm, W_adm_cd_shp, TRVL_TIME, LIFE_FLPOP, college) %>%
    
  # DESTNTN aggregate
    group_by(H_adm_nm, H_adm_cd_shp, W_commune_nm) %>%
    summarise(TRVL_TIME = weighted.mean(TRVL_TIME, LIFE_FLPOP),
              LIFE_FLPOP = sum(LIFE_FLPOP),
              college = mean(college)) %>%
    ungroup() %>%
    mutate(Time = str_c(I))
  
    head(temp)
    
  data <- data %>%
        dplyr::bind_rows(temp)
    rm(temp) 
    print("here")
    }

}

#data %>%
#  write_excel_csv("data_longitudinal/flow_30months_20230606.csv")

5.4 joining time-varying predictors

5.4.1 set up

#data <- read_csv("data_longitudinal/flow_30months_20230528.csv")
#data
#data <- data %>%  
#    mutate(H_adm_nm = as.character(H_adm_nm),
#           H_adm_cd_shp = as.character(H_adm_cd_shp)) %>%
#    rename(time_distance = TRVL_TIME,
#           flow = LIFE_FLPOP) %>%
#    mutate(hw_link = str_c(H_adm_nm, " -> ", W_commune_nm)) %>%
#    mutate(Time_ts = case_when(Time == "1" ~  "2020-01-01",
#                            Time == "2" ~  "2020-02-01",
#                            Time == "3" ~  "2020-03-01",
#                            Time == "4" ~  "2020-04-01",
#                            Time == "5" ~  "2020-05-01",
#                            Time == "6" ~  "2020-06-01",
#                            Time == "7" ~  "2020-07-01",
#                            Time == "8" ~  "2020-08-01",
#                            Time == "9" ~  "2020-09-01",
#                            Time == "10" ~ "2020-10-01",
#                            Time == "11" ~ "2020-11-01",
#                            Time == "12" ~ "2020-12-01",
#                            Time == "13" ~ "2021-01-01",
#                            Time == "14" ~ "2021-02-01",
#                            Time == "15" ~ "2021-03-01",
#                            Time == "16" ~ "2021-04-01",
#                            Time == "17" ~ "2021-05-01",
#                            Time == "18" ~ "2021-06-01",
#                            Time == "19" ~ "2021-07-01",
#                            Time == "20" ~ "2021-08-01",
#                            Time == "21" ~ "2021-09-01",
#                            Time == "22" ~ "2021-10-01",
#                            Time == "23" ~ "2021-11-01",
#                            Time == "24" ~ "2021-12-01",
#                            Time == "25" ~ "2022-01-01",
#                            Time == "26" ~ "2022-02-01",
#                            Time == "27" ~ "2022-03-01",
#                            Time == "28" ~ "2022-04-01",
#                            Time == "29" ~ "2022-05-01",
#                            Time == "30" ~ "2022-06-01",
#                            )) %>%
#    mutate(Time_ts = as.character(Time_ts)) %>%
#    #arrange(home, work) %>%
#    mutate(Time_ts = tsibble::yearmonth(as.Date(Time_ts))) %>%
#    relocate(Time_ts, H_adm_nm, W_commune_nm, time_distance, college, flow) 
#data

5.4.2 covid cases

#covid <- readxl::read_xlsx("data_covid/seoul_covid.xlsx")
#
#covid <- covid %>%
#    mutate(ymd = str_sub(ymd, 1, 7)) %>%
#    group_by(ymd) %>%
#    summarise(new = sum(new)) %>%
#    mutate(ymd = ifelse(ymd %in% c("20.02.0", "20.02.1", "20.02.2"), "2020.01", ymd)) %>%
#    group_by(ymd) %>%
#    summarise(new = sum(new)) %>%
#    mutate(ymd = str_c(str_sub(ymd, 1, 4),
#                       "-",
#                       str_sub(ymd, 6, 7),
#                       "-01")) %>%
#    mutate(ymd = yearmonth(as.Date(ymd))) %>%
#    slice(1:30)
#
#data <- data %>%
#    left_join(covid, by = c("Time_ts" = "ymd"))
#data
#data %>%
#  write_excel_csv("data_longitudinal/flow_30months_concatenated_20230528.csv")

5.5 filter out flows not heading to employment centers

5.5.1 set up

data <- read_csv("data_longitudinal/flow_30months_concatenated_20230528.csv") %>%  
    mutate(H_adm_nm = as.character(H_adm_nm),
           H_adm_cd_shp = as.character(H_adm_cd_shp)) %>%
    mutate(W_commune_nm = ifelse(W_commune_nm == "Myeong", "Myeongdong", W_commune_nm))
data
:)   # A tibble: 1,439,415 × 10
:)      Time_ts H_adm_nm      W_commune_nm time_distance college   flow H_adm_cd_shp  Time hw_link                      new
:)      <chr>   <chr>         <chr>                <dbl>   <dbl>  <dbl> <chr>        <dbl> <chr>                      <dbl>
:)    1 2020 1  가평군_가평읍 Anam                  47.0   0.360   6.56 3137011          1 가평군_가평읍 -> Anam          0
:)    2 2020 1  가평군_가평읍 Bangbae               47.8   0.360  45.4  3137011          1 가평군_가평읍 -> Bangbae       0
:)    3 2020 1  가평군_가평읍 Banghak               30     0.360  18.1  3137011          1 가평군_가평읍 -> Banghak       0
:)    4 2020 1  가평군_가평읍 Bangi                 37.3   0.360 123.   3137011          1 가평군_가평읍 -> Bangi         0
:)    5 2020 1  가평군_가평읍 Changshin             34.5   0.360 135.   3137011          1 가평군_가평읍 -> Changshin     0
:)    6 2020 1  가평군_가평읍 Cheongdam             47.5   0.360 134.   3137011          1 가평군_가평읍 -> Cheongdam     0
:)    7 2020 1  가평군_가평읍 Daechi                34.9   0.360 123.   3137011          1 가평군_가평읍 -> Daechi        0
:)    8 2020 1  가평군_가평읍 Daehak                60     0.360   2    3137011          1 가평군_가평읍 -> Daehak        0
:)    9 2020 1  가평군_가평읍 Gil                   20     0.360  24.1  3137011          1 가평군_가평읍 -> Gil           0
:)   10 2020 1  가평군_가평읍 Gongneung             35.5   0.360 101.   3137011          1 가평군_가평읍 -> Gongneung     0
:)   # ℹ 1,439,405 more rows
employment_centers <- dong.sf_commune_filtered %>%
  st_drop_geometry() %>%
  pull(name)
employment_centers
:)    [1] "Cheongdam"    "Daechi"       "Guro"         "Gwanghui"     "Jamsil"       "Jongno"       "Munjeong"     "Myeongdong"   "Samseong"     "Seocho"       "Seongsu"      "Sinchon"      "Yeoksam"      "Yeongdeungpo" "Yeoui"
data <- data %>%
  filter(W_commune_nm %in% employment_centers)
data
:)   # A tibble: 444,926 × 10
:)      Time_ts H_adm_nm      W_commune_nm time_distance college   flow H_adm_cd_shp  Time hw_link                      new
:)      <chr>   <chr>         <chr>                <dbl>   <dbl>  <dbl> <chr>        <dbl> <chr>                      <dbl>
:)    1 2020 1  가평군_가평읍 Cheongdam             47.5   0.360 134.   3137011          1 가평군_가평읍 -> Cheongdam     0
:)    2 2020 1  가평군_가평읍 Daechi                34.9   0.360 123.   3137011          1 가평군_가평읍 -> Daechi        0
:)    3 2020 1  가평군_가평읍 Guro                  42.8   0.360  34.7  3137011          1 가평군_가평읍 -> Guro          0
:)    4 2020 1  가평군_가평읍 Gwanghui              40     0.360 131.   3137011          1 가평군_가평읍 -> Gwanghui      0
:)    5 2020 1  가평군_가평읍 Jamsil                46.6   0.360   9.13 3137011          1 가평군_가평읍 -> Jamsil        0
:)    6 2020 1  가평군_가평읍 Jongno                37.1   0.360 118.   3137011          1 가평군_가평읍 -> Jongno        0
:)    7 2020 1  가평군_가평읍 Munjeong              71.6   0.360  12.6  3137011          1 가평군_가평읍 -> Munjeong      0
:)    8 2020 1  가평군_가평읍 Myeongdong            54.3   0.360  61.1  3137011          1 가평군_가평읍 -> Myeong        0
:)    9 2020 1  가평군_가평읍 Samseong              61.9   0.360  46.2  3137011          1 가평군_가평읍 -> Samseong      0
:)   10 2020 1  가평군_가평읍 Seocho                43.3   0.360 100.   3137011          1 가평군_가평읍 -> Seocho        0
:)   # ℹ 444,916 more rows

5.6 filter out void flows

5.6.1 NA

420 = 30(months) * 14(employment centers). 즉, 출발지역 하나만 놓친 정도로 준수함.

data %>%
  is.na() %>%
  colSums()
:)         Time_ts      H_adm_nm  W_commune_nm time_distance       college          flow  H_adm_cd_shp          Time       hw_link           new 
:)               0           450             0             0           450             0           450             0           450             0
data <- data %>%
  filter(!is.na(college))
data
:)   # A tibble: 444,476 × 10
:)      Time_ts H_adm_nm      W_commune_nm time_distance college   flow H_adm_cd_shp  Time hw_link                      new
:)      <chr>   <chr>         <chr>                <dbl>   <dbl>  <dbl> <chr>        <dbl> <chr>                      <dbl>
:)    1 2020 1  가평군_가평읍 Cheongdam             47.5   0.360 134.   3137011          1 가평군_가평읍 -> Cheongdam     0
:)    2 2020 1  가평군_가평읍 Daechi                34.9   0.360 123.   3137011          1 가평군_가평읍 -> Daechi        0
:)    3 2020 1  가평군_가평읍 Guro                  42.8   0.360  34.7  3137011          1 가평군_가평읍 -> Guro          0
:)    4 2020 1  가평군_가평읍 Gwanghui              40     0.360 131.   3137011          1 가평군_가평읍 -> Gwanghui      0
:)    5 2020 1  가평군_가평읍 Jamsil                46.6   0.360   9.13 3137011          1 가평군_가평읍 -> Jamsil        0
:)    6 2020 1  가평군_가평읍 Jongno                37.1   0.360 118.   3137011          1 가평군_가평읍 -> Jongno        0
:)    7 2020 1  가평군_가평읍 Munjeong              71.6   0.360  12.6  3137011          1 가평군_가평읍 -> Munjeong      0
:)    8 2020 1  가평군_가평읍 Myeongdong            54.3   0.360  61.1  3137011          1 가평군_가평읍 -> Myeong        0
:)    9 2020 1  가평군_가평읍 Samseong              61.9   0.360  46.2  3137011          1 가평군_가평읍 -> Samseong      0
:)   10 2020 1  가평군_가평읍 Seocho                43.3   0.360 100.   3137011          1 가평군_가평읍 -> Seocho        0
:)   # ℹ 444,466 more rows

5.6.2 missing values

hw_link 별 count가 30이 아니면, 그 hw_link 계정 자체를 지움 20,149 -> 16,390

hw_link_survived <- data %>%
  group_by(hw_link) %>%
  count() %>%
  filter(n == 30) %>%
  pull(hw_link)
hw_link_survived
:)    [1] "가평군_가평읍 -> Cheongdam"     "가평군_가평읍 -> Guro"          "가평군_가평읍 -> Gwanghui"      "가평군_가평읍 -> Jongno"        "가평군_가평읍 -> Myeong"        "가평군_가평읍 -> Seocho"        "가평군_가평읍 -> Seongsu"       "가평군_가평읍 -> Yeoksam"       "가평군_가평읍 -> Yeoui"         "가평군_상면 -> Samseong"        "가평군_상면 -> Seocho"          "가평군_설악면 -> Gwanghui"      "가평군_설악면 -> Jamsil"        "가평군_설악면 -> Jongno"       
:)   [15] "가평군_설악면 -> Samseong"      "가평군_설악면 -> Seocho"        "가평군_설악면 -> Yeoksam"       "가평군_설악면 -> Yeongdeungpo"  "가평군_청평면 -> Cheongdam"     "가평군_청평면 -> Gwanghui"      "가평군_청평면 -> Jamsil"        "가평군_청평면 -> Jongno"        "가평군_청평면 -> Seocho"        "가평군_청평면 -> Seongsu"       "가평군_청평면 -> Yeoksam"       "강남구_개포1동 -> Cheongdam"    "강남구_개포1동 -> Daechi"       "강남구_개포1동 -> Guro"        
:)   [29] "강남구_개포1동 -> Gwanghui"     "강남구_개포1동 -> Jamsil"       "강남구_개포1동 -> Jongno"       "강남구_개포1동 -> Munjeong"     "강남구_개포1동 -> Myeong"       "강남구_개포1동 -> Samseong"     "강남구_개포1동 -> Seocho"       "강남구_개포1동 -> Seongsu"      "강남구_개포1동 -> Sinchon"      "강남구_개포1동 -> Yeoksam"      "강남구_개포1동 -> Yeongdeungpo" "강남구_개포1동 -> Yeoui"        "강남구_개포2동 -> Cheongdam"    "강남구_개포2동 -> Daechi"      
:)   [43] "강남구_개포2동 -> Guro"         "강남구_개포2동 -> Gwanghui"     "강남구_개포2동 -> Jamsil"       "강남구_개포2동 -> Jongno"       "강남구_개포2동 -> Munjeong"     "강남구_개포2동 -> Myeong"       "강남구_개포2동 -> Samseong"     "강남구_개포2동 -> Seocho"       "강남구_개포2동 -> Seongsu"      "강남구_개포2동 -> Sinchon"      "강남구_개포2동 -> Yeoksam"      "강남구_개포2동 -> Yeongdeungpo" "강남구_개포2동 -> Yeoui"        "강남구_개포4동 -> Cheongdam"   
:)   [57] "강남구_개포4동 -> Daechi"       "강남구_개포4동 -> Guro"         "강남구_개포4동 -> Gwanghui"     "강남구_개포4동 -> Jamsil"       "강남구_개포4동 -> Jongno"       "강남구_개포4동 -> Munjeong"     "강남구_개포4동 -> Myeong"       "강남구_개포4동 -> Samseong"     "강남구_개포4동 -> Seocho"       "강남구_개포4동 -> Seongsu"      "강남구_개포4동 -> Sinchon"      "강남구_개포4동 -> Yeoksam"      "강남구_개포4동 -> Yeongdeungpo" "강남구_개포4동 -> Yeoui"       
:)   [71] "강남구_논현1동 -> Cheongdam"    "강남구_논현1동 -> Daechi"       "강남구_논현1동 -> Guro"         "강남구_논현1동 -> Gwanghui"     "강남구_논현1동 -> Jamsil"      
:)    [ reached getOption("max.print") -- omitted 13388 entries ]
data <- data %>% 
  filter(hw_link %in% hw_link_survived)
data
:)   # A tibble: 403,890 × 10
:)      Time_ts H_adm_nm      W_commune_nm time_distance college  flow H_adm_cd_shp  Time hw_link                      new
:)      <chr>   <chr>         <chr>                <dbl>   <dbl> <dbl> <chr>        <dbl> <chr>                      <dbl>
:)    1 2020 1  가평군_가평읍 Cheongdam             47.5   0.360 134.  3137011          1 가평군_가평읍 -> Cheongdam     0
:)    2 2020 1  가평군_가평읍 Guro                  42.8   0.360  34.7 3137011          1 가평군_가평읍 -> Guro          0
:)    3 2020 1  가평군_가평읍 Gwanghui              40     0.360 131.  3137011          1 가평군_가평읍 -> Gwanghui      0
:)    4 2020 1  가평군_가평읍 Jongno                37.1   0.360 118.  3137011          1 가평군_가평읍 -> Jongno        0
:)    5 2020 1  가평군_가평읍 Myeongdong            54.3   0.360  61.1 3137011          1 가평군_가평읍 -> Myeong        0
:)    6 2020 1  가평군_가평읍 Seocho                43.3   0.360 100.  3137011          1 가평군_가평읍 -> Seocho        0
:)    7 2020 1  가평군_가평읍 Seongsu               30.1   0.360  49.0 3137011          1 가평군_가평읍 -> Seongsu       0
:)    8 2020 1  가평군_가평읍 Yeoksam               43.3   0.360  20.7 3137011          1 가평군_가평읍 -> Yeoksam       0
:)    9 2020 1  가평군_가평읍 Yeoui                 36.9   0.360 103.  3137011          1 가평군_가평읍 -> Yeoui         0
:)   10 2020 1  가평군_상면   Samseong              40     0.397  71.7 3137033          1 가평군_상면 -> Samseong        0
:)   # ℹ 403,880 more rows

5.8 mapping origins that are effective

5.8.1 figure 4a

dong.sf_resid_filtered <- dong.sf_resid %>%
  filter(adm_nm %in% resid_effective)
dong.sf_resid_filtered
:)   Simple feature collection with 913 features and 4 fields
:)   Geometry type: GEOMETRY
:)   Dimension:     XY
:)   Bounding box:  xmin: 901000 ymin: 1890000 xmax: 1010000 ymax: 1990000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 913 × 5
:)      adm_nm                 adm_cd                                                                                 geometry college num_of_neighbors
:)    * <chr>                  <chr>                                                                        <MULTIPOLYGON [m]>   <dbl>            <int>
:)    1 종로구_사직동          1101053 (((953554 1953336, 953555 1953320, 953556 1953307, 953557 1953295, 953558 1953281, 9...   0.808                7
:)    2 종로구_삼청동          1101054 (((953844 1955492, 953859 1955490, 953902 1955493, 953912 1955493, 953916 1955492, 9...   0.706                7
:)    3 종로구_부암동          1101055 (((952490 1956549, 952498 1956533, 952501 1956525, 952501 1956524, 952492 1956515, 9...   0.708                7
:)    4 종로구_평창동          1101056 (((953684 1959210, 953665 1959132, 953647 1959057, 953651 1959043, 953672 1958971, 9...   0.779                9
:)    5 종로구_무악동          1101057 (((952298 1953540, 952325 1953508, 952329 1953500, 952338 1953484, 952339 1953482, 9...   0.780                7
:)    6 종로구_교남동          1101058 (((952572 1953259, 952573 1953256, 952575 1953250, 952577 1953241, 952580 1953234, 9...   0.675                6
:)    7 종로구_가회동          1101060 (((954895 1954615, 954888 1954592, 954865 1954592, 954856 1954592, 954838 1954563, 9...   0.603                4
:)    8 종로구_종로1.2.3.4가동 1101061 (((954918 1954372, 954926 1954362, 954932 1954355, 954937 1954352, 954949 1954346, 9...   0.682               10
:)    9 종로구_종로5.6가동     1101063 (((956607 1953150, 956607 1953148, 956607 1953146, 956607 1953144, 956607 1953139, 9...   0.537                8
:)   10 종로구_이화동          1101064 (((956366 1954112, 956372 1954108, 956379 1954108, 956379 1954108, 956408 1954108, 9...   0.645                6
:)   # ℹ 903 more rows

5.9 define college quartile

5.9.1 college quartile (SMA)

dong.sf_resid_filtered <- dong.sf_resid_filtered %>%
    mutate(col_qrt = case_when(
        quantile(college, 0.75) < college ~ "high(Q4)",
        quantile(college, 0.50) < college & college <= quantile(college, 0.75) ~ "middle-high(Q3)",
        quantile(college, 0.25) < college & college <= quantile(college, 0.50) ~ "middle-low(Q2)",
        college <= quantile(college, 0.25) ~ "low(Q1)")) %>%
    mutate(col_qrt = factor(col_qrt, levels = c("low(Q1)", "middle-low(Q2)", "middle-high(Q3)", "high(Q4)")))
dong.sf_resid_filtered
:)   Simple feature collection with 913 features and 5 fields
:)   Geometry type: GEOMETRY
:)   Dimension:     XY
:)   Bounding box:  xmin: 901000 ymin: 1890000 xmax: 1010000 ymax: 1990000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 913 × 6
:)      adm_nm                 adm_cd                                                                                 geometry college num_of_neighbors col_qrt        
:)    * <chr>                  <chr>                                                                        <MULTIPOLYGON [m]>   <dbl>            <int> <fct>          
:)    1 종로구_사직동          1101053 (((953554 1953336, 953555 1953320, 953556 1953307, 953557 1953295, 953558 1953281, 9...   0.808                7 high(Q4)       
:)    2 종로구_삼청동          1101054 (((953844 1955492, 953859 1955490, 953902 1955493, 953912 1955493, 953916 1955492, 9...   0.706                7 high(Q4)       
:)    3 종로구_부암동          1101055 (((952490 1956549, 952498 1956533, 952501 1956525, 952501 1956524, 952492 1956515, 9...   0.708                7 high(Q4)       
:)    4 종로구_평창동          1101056 (((953684 1959210, 953665 1959132, 953647 1959057, 953651 1959043, 953672 1958971, 9...   0.779                9 high(Q4)       
:)    5 종로구_무악동          1101057 (((952298 1953540, 952325 1953508, 952329 1953500, 952338 1953484, 952339 1953482, 9...   0.780                7 high(Q4)       
:)    6 종로구_교남동          1101058 (((952572 1953259, 952573 1953256, 952575 1953250, 952577 1953241, 952580 1953234, 9...   0.675                6 high(Q4)       
:)    7 종로구_가회동          1101060 (((954895 1954615, 954888 1954592, 954865 1954592, 954856 1954592, 954838 1954563, 9...   0.603                4 middle-high(Q3)
:)    8 종로구_종로1.2.3.4가동 1101061 (((954918 1954372, 954926 1954362, 954932 1954355, 954937 1954352, 954949 1954346, 9...   0.682               10 high(Q4)       
:)    9 종로구_종로5.6가동     1101063 (((956607 1953150, 956607 1953148, 956607 1953146, 956607 1953144, 956607 1953139, 9...   0.537                8 middle-high(Q3)
:)   10 종로구_이화동          1101064 (((956366 1954112, 956372 1954108, 956379 1954108, 956379 1954108, 956408 1954108, 9...   0.645                6 high(Q4)       
:)   # ℹ 903 more rows
summary(dong.sf_resid_filtered$college)
:)      Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
:)     0.144   0.352   0.482   0.497   0.626   0.946
summary(dong.sf_resid_filtered$college) * 100
:)      Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
:)      14.4    35.2    48.2    49.7    62.6    94.6

5.9.2 college quartile (within Seoul)

dong.sf_resid_filtered_withinseoul <- dong.sf_resid_filtered %>%
  st_drop_geometry() %>%
  select(adm_cd, adm_nm, college) %>%                                        # kosis 아닌 shp 기준의 코드만 남겨나
  filter(str_sub(adm_cd, 1, 2) == "11") %>%
  mutate(col_qrt_withinSeoul = case_when(
        quantile(college, 0.75) < college ~ "high(Q4)",
        quantile(college, 0.50) < college & college <= quantile(college, 0.75) ~ "middle-high(Q3)",
        quantile(college, 0.25) < college & college <= quantile(college, 0.50) ~ "middle-low(Q2)",
        college <= quantile(college, 0.25) ~ "low(Q1)")) %>%
    mutate(col_qrt_withinSeoul = factor(col_qrt_withinSeoul, levels = c("low(Q1)", "middle-low(Q2)", "middle-high(Q3)", "high(Q4)")))
dong.sf_resid_filtered_withinseoul
:)   # A tibble: 423 × 4
:)      adm_cd  adm_nm                 college col_qrt_withinSeoul
:)      <chr>   <chr>                    <dbl> <fct>              
:)    1 1101053 종로구_사직동            0.808 high(Q4)           
:)    2 1101054 종로구_삼청동            0.706 high(Q4)           
:)    3 1101055 종로구_부암동            0.708 high(Q4)           
:)    4 1101056 종로구_평창동            0.779 high(Q4)           
:)    5 1101057 종로구_무악동            0.780 high(Q4)           
:)    6 1101058 종로구_교남동            0.675 middle-high(Q3)    
:)    7 1101060 종로구_가회동            0.603 middle-high(Q3)    
:)    8 1101061 종로구_종로1.2.3.4가동   0.682 middle-high(Q3)    
:)    9 1101063 종로구_종로5.6가동       0.537 middle-low(Q2)     
:)   10 1101064 종로구_이화동            0.645 middle-high(Q3)    
:)   # ℹ 413 more rows

5.9.3 college quartile (outof Seoul)

dong.sf_resid_filtered_outofseoul <- dong.sf_resid_filtered %>%
  st_drop_geometry() %>%
  select(adm_cd, adm_nm, college) %>%                                        # kosis 아닌 shp 기준의 코드만 남겨나
  filter(str_sub(adm_cd, 1, 2) != "11") %>%
  mutate(col_qrt_outofSeoul = case_when(
        quantile(college, 0.75) < college ~ "high(Q4)",
        quantile(college, 0.50) < college & college <= quantile(college, 0.75) ~ "middle-high(Q3)",
        quantile(college, 0.25) < college & college <= quantile(college, 0.50) ~ "middle-low(Q2)",
        college <= quantile(college, 0.25) ~ "low(Q1)")) %>%
    mutate(col_qrt_outofSeoul = factor(col_qrt_outofSeoul, levels = c("low(Q1)", "middle-low(Q2)", "middle-high(Q3)", "high(Q4)")))
dong.sf_resid_filtered_outofseoul
:)   # A tibble: 490 × 4
:)      adm_cd  adm_nm         college col_qrt_outofSeoul
:)      <chr>   <chr>            <dbl> <fct>             
:)    1 2301053 중구_신포동      0.341 middle-low(Q2)    
:)    2 2301054 중구_신흥동      0.323 middle-low(Q2)    
:)    3 2301060 중구_북성동      0.284 low(Q1)           
:)    4 2301064 중구_운서동      0.489 middle-high(Q3)   
:)    5 2301065 중구_영종동      0.438 middle-high(Q3)   
:)    6 2301066 중구_영종1동     0.520 middle-high(Q3)   
:)    7 2302055 동구_송현1.2동   0.330 middle-low(Q2)    
:)    8 2302060 동구_송림3.5동   0.297 low(Q1)           
:)    9 2302063 동구_송림6동     0.273 low(Q1)           
:)   10 2304051 연수구_옥련1동   0.330 middle-low(Q2)    
:)   # ℹ 480 more rows

5.9.4 integrate

dong.sf_resid_filtered <- dong.sf_resid_filtered %>%
  left_join(dong.sf_resid_filtered_withinseoul, by = c("adm_cd", "adm_nm", "college")) %>%
  left_join(dong.sf_resid_filtered_outofseoul, by = c("adm_cd", "adm_nm", "college"))
dong.sf_resid_filtered
:)   Simple feature collection with 913 features and 7 fields
:)   Geometry type: GEOMETRY
:)   Dimension:     XY
:)   Bounding box:  xmin: 901000 ymin: 1890000 xmax: 1010000 ymax: 1990000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 913 × 8
:)      adm_nm                 adm_cd                                                                                 geometry college num_of_neighbors col_qrt         col_qrt_withinSeoul col_qrt_outofSeoul
:)      <chr>                  <chr>                                                                        <MULTIPOLYGON [m]>   <dbl>            <int> <fct>           <fct>               <fct>             
:)    1 종로구_사직동          1101053 (((953554 1953336, 953555 1953320, 953556 1953307, 953557 1953295, 953558 1953281, 9...   0.808                7 high(Q4)        high(Q4)            <NA>              
:)    2 종로구_삼청동          1101054 (((953844 1955492, 953859 1955490, 953902 1955493, 953912 1955493, 953916 1955492, 9...   0.706                7 high(Q4)        high(Q4)            <NA>              
:)    3 종로구_부암동          1101055 (((952490 1956549, 952498 1956533, 952501 1956525, 952501 1956524, 952492 1956515, 9...   0.708                7 high(Q4)        high(Q4)            <NA>              
:)    4 종로구_평창동          1101056 (((953684 1959210, 953665 1959132, 953647 1959057, 953651 1959043, 953672 1958971, 9...   0.779                9 high(Q4)        high(Q4)            <NA>              
:)    5 종로구_무악동          1101057 (((952298 1953540, 952325 1953508, 952329 1953500, 952338 1953484, 952339 1953482, 9...   0.780                7 high(Q4)        high(Q4)            <NA>              
:)    6 종로구_교남동          1101058 (((952572 1953259, 952573 1953256, 952575 1953250, 952577 1953241, 952580 1953234, 9...   0.675                6 high(Q4)        middle-high(Q3)     <NA>              
:)    7 종로구_가회동          1101060 (((954895 1954615, 954888 1954592, 954865 1954592, 954856 1954592, 954838 1954563, 9...   0.603                4 middle-high(Q3) middle-high(Q3)     <NA>              
:)    8 종로구_종로1.2.3.4가동 1101061 (((954918 1954372, 954926 1954362, 954932 1954355, 954937 1954352, 954949 1954346, 9...   0.682               10 high(Q4)        middle-high(Q3)     <NA>              
:)    9 종로구_종로5.6가동     1101063 (((956607 1953150, 956607 1953148, 956607 1953146, 956607 1953144, 956607 1953139, 9...   0.537                8 middle-high(Q3) middle-low(Q2)      <NA>              
:)   10 종로구_이화동          1101064 (((956366 1954112, 956372 1954108, 956379 1954108, 956379 1954108, 956408 1954108, 9...   0.645                6 high(Q4)        middle-high(Q3)     <NA>              
:)   # ℹ 903 more rows
rm(dong.sf_resid_filtered_withinseoul,
   dong.sf_resid_filtered_outofseoul)

5.10 joining time-invariant predictors

resid_feature <- dong.sf_resid_filtered %>%
  st_drop_geometry() %>%
  select(adm_nm, adm_cd, college, col_qrt, col_qrt_withinSeoul, col_qrt_outofSeoul)
emp_feature <- dong.sf_commune_filtered %>%
  st_drop_geometry() %>%
  select(name, cluster, total_emp, density_emp, cluster, share_of_KBI, share_of_CSI)
data_filtered <- data_filtered %>%
  select(-college) %>%
  left_join(resid_feature, by = c("H_adm_nm" = "adm_nm")) %>%
  left_join(emp_feature, by = c("W_commune_nm" = "name")) %>%
  relocate(Time_ts, H_adm_nm, W_commune_nm, flow, time_distance, cluster, college)
data_filtered  
:)   # A tibble: 302,940 × 20
:)      Time_ts H_adm_nm       W_commune_nm  flow time_distance cluster       college H_adm_cd_shp  Time hw_link                       new flow_mean adm_cd  col_qrt  col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI
:)      <chr>   <chr>          <chr>        <dbl>         <dbl> <fct>           <dbl> <chr>        <dbl> <chr>                       <dbl>     <dbl> <chr>   <fct>    <fct>               <fct>                  <dbl>       <dbl>        <dbl>        <dbl>
:)    1 2020 1  강남구_개포1동 Cheongdam    1791.          17.0 Mixed           0.889 1123068          1 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890       212.          27.6         38.8
:)    2 2020 1  강남구_개포1동 Daechi       3265.          14.1 Mixed           0.889 1123068          1 강남구_개포1동 -> Daechi        0     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633       116.          29.2         31.3
:)    3 2020 1  강남구_개포1동 Guro          414.          31.4 Manufacturing   0.889 1123068          1 강남구_개포1동 -> Guro          0      322. 1123068 high(Q4) high(Q4)            <NA>                  336770       304.          36.3         25.9
:)    4 2020 1  강남구_개포1동 Gwanghui      344.          23.8 Manufacturing   0.889 1123068          1 강남구_개포1동 -> Gwanghui      0      356. 1123068 high(Q4) high(Q4)            <NA>                  159615       239.          14.8         44.7
:)    5 2020 1  강남구_개포1동 Jamsil        947.          17.0 Mixed           0.889 1123068          1 강남구_개포1동 -> Jamsil        0      615. 1123068 high(Q4) high(Q4)            <NA>                  123030       102.          27.1         40.1
:)    6 2020 1  강남구_개포1동 Jongno       1209.          24.7 Mixed           0.889 1123068          1 강남구_개포1동 -> Jongno        0      755. 1123068 high(Q4) high(Q4)            <NA>                  129040       203.          28.6         32.9
:)    7 2020 1  강남구_개포1동 Munjeong      451.          22.4 Manufacturing   0.889 1123068          1 강남구_개포1동 -> Munjeong      0      549. 1123068 high(Q4) high(Q4)            <NA>                  110472        98.5         24.9         32.7
:)    8 2020 1  강남구_개포1동 Myeongdong    870.          24.1 Financial       0.889 1123068          1 강남구_개포1동 -> Myeong        0      965. 1123068 high(Q4) high(Q4)            <NA>                  217113       773.          40.1         29.7
:)    9 2020 1  강남구_개포1동 Samseong     1763.          15.6 Professional    0.889 1123068          1 강남구_개포1동 -> Samseong      0     1255. 1123068 high(Q4) high(Q4)            <NA>                  120988       320.          31.0         37.8
:)   10 2020 1  강남구_개포1동 Seocho        837.          15.6 Professional    0.889 1123068          1 강남구_개포1동 -> Seocho        0      791. 1123068 high(Q4) high(Q4)            <NA>                  103264       236.          35.7         25.2
:)   # ℹ 302,930 more rows
rm(resid_feature, emp_feature)

5.11 export

data_filtered %>%
  write_excel_csv("data_longitudinal/data_filtered_20230823.csv")

5.12 clean your RAM

rm(emp_mode, covid, kosis_code, kosis_ingu_edu_emd_2020, kosis_sgg, p1, p2)



6 EDA

6.1 import data

data_filtered <- read_csv("data_longitudinal/data_filtered_20230823.csv")
data_filtered
:)   # A tibble: 302,940 × 20
:)      Time_ts H_adm_nm       W_commune_nm  flow time_distance cluster       college H_adm_cd_shp  Time hw_link                       new flow_mean  adm_cd col_qrt  col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI
:)      <chr>   <chr>          <chr>        <dbl>         <dbl> <chr>           <dbl>        <dbl> <dbl> <chr>                       <dbl>     <dbl>   <dbl> <chr>    <chr>               <chr>                  <dbl>       <dbl>        <dbl>        <dbl>
:)    1 2020 1  강남구_개포1동 Cheongdam    1791.          17.0 Mixed           0.889      1123068     1 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890       212.          27.6         38.8
:)    2 2020 1  강남구_개포1동 Daechi       3265.          14.1 Mixed           0.889      1123068     1 강남구_개포1동 -> Daechi        0     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633       116.          29.2         31.3
:)    3 2020 1  강남구_개포1동 Guro          414.          31.4 Manufacturing   0.889      1123068     1 강남구_개포1동 -> Guro          0      322. 1123068 high(Q4) high(Q4)            <NA>                  336770       304.          36.3         25.9
:)    4 2020 1  강남구_개포1동 Gwanghui      344.          23.8 Manufacturing   0.889      1123068     1 강남구_개포1동 -> Gwanghui      0      356. 1123068 high(Q4) high(Q4)            <NA>                  159615       239.          14.8         44.7
:)    5 2020 1  강남구_개포1동 Jamsil        947.          17.0 Mixed           0.889      1123068     1 강남구_개포1동 -> Jamsil        0      615. 1123068 high(Q4) high(Q4)            <NA>                  123030       102.          27.1         40.1
:)    6 2020 1  강남구_개포1동 Jongno       1209.          24.7 Mixed           0.889      1123068     1 강남구_개포1동 -> Jongno        0      755. 1123068 high(Q4) high(Q4)            <NA>                  129040       203.          28.6         32.9
:)    7 2020 1  강남구_개포1동 Munjeong      451.          22.4 Manufacturing   0.889      1123068     1 강남구_개포1동 -> Munjeong      0      549. 1123068 high(Q4) high(Q4)            <NA>                  110472        98.5         24.9         32.7
:)    8 2020 1  강남구_개포1동 Myeongdong    870.          24.1 Financial       0.889      1123068     1 강남구_개포1동 -> Myeong        0      965. 1123068 high(Q4) high(Q4)            <NA>                  217113       773.          40.1         29.7
:)    9 2020 1  강남구_개포1동 Samseong     1763.          15.6 Professional    0.889      1123068     1 강남구_개포1동 -> Samseong      0     1255. 1123068 high(Q4) high(Q4)            <NA>                  120988       320.          31.0         37.8
:)   10 2020 1  강남구_개포1동 Seocho        837.          15.6 Professional    0.889      1123068     1 강남구_개포1동 -> Seocho        0      791. 1123068 high(Q4) high(Q4)            <NA>                  103264       236.          35.7         25.2
:)   # ℹ 302,930 more rows
data_filtered_ts <- data_filtered %>%
    mutate(Time_ts = yearmonth(Time_ts)) %>%
    tsibble::as_tsibble(index = Time_ts, key = hw_link) %>%
    mutate(cluster = factor(cluster, levels = c("Manufacturing", "Mixed", "Professional", "Financial"))) %>%
    mutate(col_qrt = factor(col_qrt, 
                                   levels = c("low(Q1)", "middle-low(Q2)", "middle-high(Q3)", "high(Q4)")),
         col_qrt_withinSeoul = factor(col_qrt_withinSeoul, 
                                   levels = c("low(Q1)", "middle-low(Q2)", "middle-high(Q3)", "high(Q4)"))) %>%
  mutate(H_adm_cd_shp = as.character(H_adm_cd_shp))
data_filtered_ts 
:)   # A tsibble: 302,940 x 20 [1M]
:)   # Key:       hw_link [10,098]
:)      Time_ts H_adm_nm       W_commune_nm  flow time_distance cluster college H_adm_cd_shp  Time hw_link                       new flow_mean  adm_cd col_qrt  col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI
:)        <mth> <chr>          <chr>        <dbl>         <dbl> <fct>     <dbl> <chr>        <dbl> <chr>                       <dbl>     <dbl>   <dbl> <fct>    <fct>               <chr>                  <dbl>       <dbl>        <dbl>        <dbl>
:)    1  2020 1 강남구_개포1동 Cheongdam    1791.          17.0 Mixed     0.889 1123068          1 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    2  2020 2 강남구_개포1동 Cheongdam    1494.          16.6 Mixed     0.889 1123068          2 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    3  2020 3 강남구_개포1동 Cheongdam    1835.          16.6 Mixed     0.889 1123068          3 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    4  2020 4 강남구_개포1동 Cheongdam    1894.          16.6 Mixed     0.889 1123068          4 강남구_개포1동 -> Cheongdam     7     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    5  2020 5 강남구_개포1동 Cheongdam    1946.          16.9 Mixed     0.889 1123068          5 강남구_개포1동 -> Cheongdam   228     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    6  2020 6 강남구_개포1동 Cheongdam    2073.          18.2 Mixed     0.889 1123068          6 강남구_개포1동 -> Cheongdam   451     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    7  2020 7 강남구_개포1동 Cheongdam    2017.          16.8 Mixed     0.889 1123068          7 강남구_개포1동 -> Cheongdam   288     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    8  2020 8 강남구_개포1동 Cheongdam    1581.          17.2 Mixed     0.889 1123068          8 강남구_개포1동 -> Cheongdam  2267     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    9  2020 9 강남구_개포1동 Cheongdam    1639.          15.6 Mixed     0.889 1123068          9 강남구_개포1동 -> Cheongdam  1424     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)   10 2020 10 강남구_개포1동 Cheongdam    1669.          16.2 Mixed     0.889 1123068         10 강남구_개포1동 -> Cheongdam   719     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)   # ℹ 302,930 more rows
colSums(is.na(data_filtered_ts))
:)               Time_ts            H_adm_nm        W_commune_nm                flow       time_distance             cluster             college        H_adm_cd_shp                Time             hw_link                 new           flow_mean              adm_cd             col_qrt col_qrt_withinSeoul  col_qrt_outofSeoul           total_emp         density_emp        share_of_KBI        share_of_CSI 
:)                     0                   0                   0                   0                   0                   0                   0                   0                   0                   0                   0                   0                   0                   0              125370              177570                   0                   0                   0                   0
#rm(data_filtered)

6.2 impute time-invariant time distance

data_filtered_ts <- data_filtered_ts %>%
    group_by(hw_link) %>%
    mutate(time_distance = weighted.mean(time_distance, flow)) %>%
    rename(distance = time_distance) %>%
    ungroup()
data_filtered_ts
:)   # A tsibble: 302,940 x 20 [1M]
:)   # Key:       hw_link [10,098]
:)      Time_ts H_adm_nm       W_commune_nm  flow distance cluster college H_adm_cd_shp  Time hw_link                       new flow_mean  adm_cd col_qrt  col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI
:)        <mth> <chr>          <chr>        <dbl>    <dbl> <fct>     <dbl> <chr>        <dbl> <chr>                       <dbl>     <dbl>   <dbl> <fct>    <fct>               <chr>                  <dbl>       <dbl>        <dbl>        <dbl>
:)    1  2020 1 강남구_개포1동 Cheongdam    1791.     17.7 Mixed     0.889 1123068          1 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    2  2020 2 강남구_개포1동 Cheongdam    1494.     17.7 Mixed     0.889 1123068          2 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    3  2020 3 강남구_개포1동 Cheongdam    1835.     17.7 Mixed     0.889 1123068          3 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    4  2020 4 강남구_개포1동 Cheongdam    1894.     17.7 Mixed     0.889 1123068          4 강남구_개포1동 -> Cheongdam     7     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    5  2020 5 강남구_개포1동 Cheongdam    1946.     17.7 Mixed     0.889 1123068          5 강남구_개포1동 -> Cheongdam   228     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    6  2020 6 강남구_개포1동 Cheongdam    2073.     17.7 Mixed     0.889 1123068          6 강남구_개포1동 -> Cheongdam   451     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    7  2020 7 강남구_개포1동 Cheongdam    2017.     17.7 Mixed     0.889 1123068          7 강남구_개포1동 -> Cheongdam   288     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    8  2020 8 강남구_개포1동 Cheongdam    1581.     17.7 Mixed     0.889 1123068          8 강남구_개포1동 -> Cheongdam  2267     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)    9  2020 9 강남구_개포1동 Cheongdam    1639.     17.7 Mixed     0.889 1123068          9 강남구_개포1동 -> Cheongdam  1424     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)   10 2020 10 강남구_개포1동 Cheongdam    1669.     17.7 Mixed     0.889 1123068         10 강남구_개포1동 -> Cheongdam   719     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8
:)   # ℹ 302,930 more rows

6.3 bizdays

business_calendar_wo_holidays <- bizdays::create.calendar('my_calendar', 
                                              weekdays = c('saturday','sunday'))
business_calendar_wo_holidays
:)   my_calendar financial calendar 
:)     0 holidays 
:)     2 weekdays (saturday, sunday) 
:)     range from 1970-01-01 to 2071-01-01 
:)   bizdays arguments adjust
:)     from: none 
:)     to:   none
business_calendar_w_holidays <- bizdays::create.calendar('my_calendar', 
                                              weekdays = c('saturday','sunday'),
                                              holidays = c(
                                                  "2020-01-01", "2020-01-24", "2020-01-25", "2020-01-26", "2020-03-01",
                                                  "2020-04-30", "2020-05-05", "2020-06-06", "2020-08-15", "2020-08-17",
                                                  "2020-09-30", "2020-10-01", "2020-10-02", "2020-10-03", "2020-10-09",
                                                  "2020-12-25",
                                                  "2021-01-01", "2021-02-11", "2021-02-12", "2021-03-01", "2021-05-05",
                                                  "2021-05-19", "2021-06-06", "2021-08-15", "2021-09-20", "2021-09-21",
                                                  "2021-10-03", "2021-10-09", "2021-12-25",
                                                  "2022-01-01", "2022-01-31", "2022-02-01", "2022-02-02", "2022-03-01", 
                                                  "2022-03-09", "2022-05-05", "2022-05-08", "2022-06-01", "2022-06-06",
                                                  "2022-08-15", "2022-09-09", "2022-09-10", "2022-09-11", "2022-09-12",
                                                  "2022-10-03", "2022-10-09", "2022-10-10", "2022-12-25"
                                              ))
business_calendar_w_holidays
:)   my_calendar financial calendar 
:)     48 holidays 
:)     2 weekdays (saturday, sunday) 
:)     range from 2020-01-01 to 2022-12-25 
:)   bizdays arguments adjust
:)     from: none 
:)     to:   none

n_weekdays_wo_hol: weekdays(whole month - weekends) n_weekdays_w_hol: actual business days(whole month - weekends - non weekend holidays) therefore n_weekdays_wo_hol > n_weekdays_w_hol Time_ts = yearmonth(Time_ts) + 1)

calendar <- data_filtered_ts %>%
    tibble() %>%
    distinct(Time_ts) %>%
    mutate(start = lubridate::as_date(Time_ts),
           end = lubridate::as_date(Time_ts + 1) - 1) %>%
    mutate(end = lubridate::as_date(end)) %>%
    mutate(n_weekdays_wo_hol = bizdays::bizdays(start, end, cal = business_calendar_wo_holidays),
           n_weekdays_w_hol = bizdays::bizdays(start, end, cal = business_calendar_w_holidays))
calendar
:)   # A tibble: 30 × 5
:)      Time_ts start      end        n_weekdays_wo_hol n_weekdays_w_hol
:)        <mth> <date>     <date>                 <dbl>            <dbl>
:)    1  2020 1 2020-01-01 2020-01-31                22               20
:)    2  2020 2 2020-02-01 2020-02-29                19               19
:)    3  2020 3 2020-03-01 2020-03-31                21               21
:)    4  2020 4 2020-04-01 2020-04-30                21               20
:)    5  2020 5 2020-05-01 2020-05-31                20               19
:)    6  2020 6 2020-06-01 2020-06-30                21               21
:)    7  2020 7 2020-07-01 2020-07-31                22               22
:)    8  2020 8 2020-08-01 2020-08-31                20               19
:)    9  2020 9 2020-09-01 2020-09-30                21               20
:)   10 2020 10 2020-10-01 2020-10-31                21               18
:)   # ℹ 20 more rows

6.4 entire period again: flow_corrected

data_filtered_ts %>%
    dplyr::left_join(calendar, by = "Time_ts") %>%
    dplyr::select(-start) %>%
    dplyr::select(-end) 
:)   # A tsibble: 302,940 x 22 [1M]
:)   # Key:       hw_link [10,098]
:)      Time_ts H_adm_nm       W_commune_nm  flow distance cluster college H_adm_cd_shp  Time hw_link                       new flow_mean  adm_cd col_qrt  col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol
:)        <mth> <chr>          <chr>        <dbl>    <dbl> <fct>     <dbl> <chr>        <dbl> <chr>                       <dbl>     <dbl>   <dbl> <fct>    <fct>               <chr>                  <dbl>       <dbl>        <dbl>        <dbl>             <dbl>            <dbl>
:)    1  2020 1 강남구_개포1동 Cheongdam    1791.     17.7 Mixed     0.889 1123068          1 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                22               20
:)    2  2020 2 강남구_개포1동 Cheongdam    1494.     17.7 Mixed     0.889 1123068          2 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                19               19
:)    3  2020 3 강남구_개포1동 Cheongdam    1835.     17.7 Mixed     0.889 1123068          3 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               21
:)    4  2020 4 강남구_개포1동 Cheongdam    1894.     17.7 Mixed     0.889 1123068          4 강남구_개포1동 -> Cheongdam     7     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               20
:)    5  2020 5 강남구_개포1동 Cheongdam    1946.     17.7 Mixed     0.889 1123068          5 강남구_개포1동 -> Cheongdam   228     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                20               19
:)    6  2020 6 강남구_개포1동 Cheongdam    2073.     17.7 Mixed     0.889 1123068          6 강남구_개포1동 -> Cheongdam   451     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               21
:)    7  2020 7 강남구_개포1동 Cheongdam    2017.     17.7 Mixed     0.889 1123068          7 강남구_개포1동 -> Cheongdam   288     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                22               22
:)    8  2020 8 강남구_개포1동 Cheongdam    1581.     17.7 Mixed     0.889 1123068          8 강남구_개포1동 -> Cheongdam  2267     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                20               19
:)    9  2020 9 강남구_개포1동 Cheongdam    1639.     17.7 Mixed     0.889 1123068          9 강남구_개포1동 -> Cheongdam  1424     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               20
:)   10 2020 10 강남구_개포1동 Cheongdam    1669.     17.7 Mixed     0.889 1123068         10 강남구_개포1동 -> Cheongdam   719     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               18
:)   # ℹ 302,930 more rows

(flow * n_weekdays_wo_hol): the monthly total commutes reported by the data (flow * n_weekdays_wo_hol) / n_weekdays_w_hol: devide the monthly total commutes by the number of actual business days months with none of non-weekend-holidays get their existing volumes months with more number of non-weekend-holidays get enhanced volumes

data_filtered_ts <- data_filtered_ts %>%
    dplyr::left_join(calendar, by = "Time_ts") %>%
    dplyr::select(-start) %>%
    dplyr::select(-end) %>%
    mutate(flow_corrected = (flow * n_weekdays_wo_hol) / n_weekdays_w_hol) %>%
    mutate(flow_corrected = (flow + flow_corrected) / 2) %>%
    relocate(Time_ts, H_adm_nm, W_commune_nm, flow, flow_corrected)
data_filtered_ts
:)   # A tsibble: 302,940 x 23 [1M]
:)   # Key:       hw_link [10,098]
:)      Time_ts H_adm_nm       W_commune_nm  flow flow_corrected distance cluster college H_adm_cd_shp  Time hw_link                       new flow_mean  adm_cd col_qrt  col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol
:)        <mth> <chr>          <chr>        <dbl>          <dbl>    <dbl> <fct>     <dbl> <chr>        <dbl> <chr>                       <dbl>     <dbl>   <dbl> <fct>    <fct>               <chr>                  <dbl>       <dbl>        <dbl>        <dbl>             <dbl>            <dbl>
:)    1  2020 1 강남구_개포1동 Cheongdam    1791.          1881.     17.7 Mixed     0.889 1123068          1 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                22               20
:)    2  2020 2 강남구_개포1동 Cheongdam    1494.          1494.     17.7 Mixed     0.889 1123068          2 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                19               19
:)    3  2020 3 강남구_개포1동 Cheongdam    1835.          1835.     17.7 Mixed     0.889 1123068          3 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               21
:)    4  2020 4 강남구_개포1동 Cheongdam    1894.          1942.     17.7 Mixed     0.889 1123068          4 강남구_개포1동 -> Cheongdam     7     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               20
:)    5  2020 5 강남구_개포1동 Cheongdam    1946.          1997.     17.7 Mixed     0.889 1123068          5 강남구_개포1동 -> Cheongdam   228     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                20               19
:)    6  2020 6 강남구_개포1동 Cheongdam    2073.          2073.     17.7 Mixed     0.889 1123068          6 강남구_개포1동 -> Cheongdam   451     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               21
:)    7  2020 7 강남구_개포1동 Cheongdam    2017.          2017.     17.7 Mixed     0.889 1123068          7 강남구_개포1동 -> Cheongdam   288     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                22               22
:)    8  2020 8 강남구_개포1동 Cheongdam    1581.          1622.     17.7 Mixed     0.889 1123068          8 강남구_개포1동 -> Cheongdam  2267     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                20               19
:)    9  2020 9 강남구_개포1동 Cheongdam    1639.          1679.     17.7 Mixed     0.889 1123068          9 강남구_개포1동 -> Cheongdam  1424     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               20
:)   10 2020 10 강남구_개포1동 Cheongdam    1669.          1808.     17.7 Mixed     0.889 1123068         10 강남구_개포1동 -> Cheongdam   719     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               18
:)   # ℹ 302,930 more rows

6.5 output

6.5.1 서울시 총통근량

data_filtered_ts_forplot <- data_filtered_ts %>%
  mutate(Time_ts = as.Date(Time_ts)) %>%
  ungroup() %>%
  as_tibble() %>%
  group_by(Time_ts) %>%
  summarise(total_flow = sum(flow_corrected) * 4,
            new = mean(new))
data_filtered_ts_forplot
:)   # A tibble: 30 × 3
:)      Time_ts    total_flow   new
:)      <date>          <dbl> <dbl>
:)    1 2020-01-01  91642993.     0
:)    2 2020-02-01  68017298.     0
:)    3 2020-03-01  74808765.     0
:)    4 2020-04-01  74605227.     7
:)    5 2020-05-01  72361670.   228
:)    6 2020-06-01  80733427.   451
:)    7 2020-07-01  84449629.   288
:)    8 2020-08-01  66633597.  2267
:)    9 2020-09-01  71421807.  1424
:)   10 2020-10-01  79850513.   719
:)   # ℹ 20 more rows
data_filtered_ts_forplot$new + 0.01
:)    [1]       0.01       0.01       0.01       7.01     228.01     451.01     288.01    2267.01    1424.01     719.01    2802.01   10209.01    5168.01    4083.01    3803.01    5811.01    6303.01    6075.01   14377.01   15019.01   21148.01   19123.01   34711.01   70977.01   52140.01  498842.01 1983383.01  722562.01  135270.01   49289.01
log10(data_filtered_ts_forplot$new + 0.01)
:)    [1] -2.000 -2.000 -2.000  0.846  2.358  2.654  2.459  3.355  3.154  2.857  3.447  4.009  3.713  3.611  3.580  3.764  3.800  3.784  4.158  4.177  4.325  4.282  4.540  4.851  4.717  5.698  6.297  5.859  5.131  4.693
10^log10(data_filtered_ts_forplot$new + 0.01)
:)    [1]       0.01       0.01       0.01       7.01     228.01     451.01     288.01    2267.01    1424.01     719.01    2802.01   10209.01    5168.01    4083.01    3803.01    5811.01    6303.01    6075.01   14377.01   15019.01   21148.01   19123.01   34711.01   70977.01   52140.01  498842.01 1983383.01  722562.01  135270.01   49289.01
#install.packages('ggbreak')
library(ggbreak)

## 문자열 깨질 때 로케일 설정 - 윈도우 
#Sys.getlocale() 
#localeToCharset() 
#
## 미국 로케일로 로케일을 변환하기 
#Sys.setlocale(category = 'LC_ALL',locale = 'english') 
#localeToCharset() 
#
## 우리나라로 로케일 변경하기 
#Sys.setlocale(category = 'LC_ALL',locale='korean') 
#localeToCharset()
my_axis_format <- function(x) format(x, big.mark = ",", scientific = FALSE)
## 문자열 깨질 때 로케일 설정 - 윈도우 
#Sys.getlocale() 
#localeToCharset() 
#
## 미국 로케일로 로케일을 변환하기 
#Sys.setlocale(category = 'LC_ALL',locale = 'english') 
#localeToCharset() 
#
## 우리나라로 로케일 변경하기 
#Sys.setlocale(category = 'LC_ALL',locale='korean') 
#localeToCharset()

6.6 window cut

data_filtered_ts %>%
    as_tibble() %>%
    ungroup() %>%
    group_by(Time_ts) %>%
    summarise(mean(flow), mean(flow_corrected))
:)   # A tibble: 30 × 3
:)      Time_ts `mean(flow)` `mean(flow_corrected)`
:)        <mth>        <dbl>                  <dbl>
:)    1  2020 1        2161.                  2269.
:)    2  2020 2        1684.                  1684.
:)    3  2020 3        1852.                  1852.
:)    4  2020 4        1802.                  1847.
:)    5  2020 5        1746.                  1791.
:)    6  2020 6        1999.                  1999.
:)    7  2020 7        2091.                  2091.
:)    8  2020 8        1607.                  1650.
:)    9  2020 9        1725.                  1768.
:)   10 2020 10        1825.                  1977.
:)   # ℹ 20 more rows
data_filtered_ts %>%
    as_tibble() %>%
    select(Time_ts) %>%
    distinct() %>%
    as.vector()
:)   $Time_ts
:)   <yearmonth[30]>
:)    [1] "2020 1"  "2020 2"  "2020 3"  "2020 4"  "2020 5"  "2020 6"  "2020 7"  "2020 8"  "2020 9"  "2020 10" "2020 11" "2020 12" "2021 1"  "2021 2"  "2021 3"  "2021 4"  "2021 5"  "2021 6"  "2021 7"  "2021 8"  "2021 9"  "2021 10" "2021 11" "2021 12" "2022 1"  "2022 2"  "2022 3"  "2022 4"  "2022 5"  "2022 6"
wave_1 <- c("2020 1", "2020 2", "2020 3", "2020 4", "2020 5", "2020 6", "2020 7") 
wave_2 <- c("2020 7", "2020 8", "2020 9", "2020 10") 
wave_3 <- c("2020 10", "2020 11", "2020 12", "2021 1", "2021 2", "2021 3") 
wave_4 <- c("2021 6", "2021 7", "2021 8", "2021 9", "2021 10", "2021 11", "2021 12")
wave_5 <- c("2021 12", "2022 1", "2022 2", "2022 3", "2022 4", "2022 5")
data_filtered_ts
:)   # A tsibble: 302,940 x 23 [1M]
:)   # Key:       hw_link [10,098]
:)      Time_ts H_adm_nm       W_commune_nm  flow flow_corrected distance cluster college H_adm_cd_shp  Time hw_link                       new flow_mean  adm_cd col_qrt  col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol
:)        <mth> <chr>          <chr>        <dbl>          <dbl>    <dbl> <fct>     <dbl> <chr>        <dbl> <chr>                       <dbl>     <dbl>   <dbl> <fct>    <fct>               <chr>                  <dbl>       <dbl>        <dbl>        <dbl>             <dbl>            <dbl>
:)    1  2020 1 강남구_개포1동 Cheongdam    1791.          1881.     17.7 Mixed     0.889 1123068          1 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                22               20
:)    2  2020 2 강남구_개포1동 Cheongdam    1494.          1494.     17.7 Mixed     0.889 1123068          2 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                19               19
:)    3  2020 3 강남구_개포1동 Cheongdam    1835.          1835.     17.7 Mixed     0.889 1123068          3 강남구_개포1동 -> Cheongdam     0     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               21
:)    4  2020 4 강남구_개포1동 Cheongdam    1894.          1942.     17.7 Mixed     0.889 1123068          4 강남구_개포1동 -> Cheongdam     7     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               20
:)    5  2020 5 강남구_개포1동 Cheongdam    1946.          1997.     17.7 Mixed     0.889 1123068          5 강남구_개포1동 -> Cheongdam   228     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                20               19
:)    6  2020 6 강남구_개포1동 Cheongdam    2073.          2073.     17.7 Mixed     0.889 1123068          6 강남구_개포1동 -> Cheongdam   451     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               21
:)    7  2020 7 강남구_개포1동 Cheongdam    2017.          2017.     17.7 Mixed     0.889 1123068          7 강남구_개포1동 -> Cheongdam   288     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                22               22
:)    8  2020 8 강남구_개포1동 Cheongdam    1581.          1622.     17.7 Mixed     0.889 1123068          8 강남구_개포1동 -> Cheongdam  2267     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                20               19
:)    9  2020 9 강남구_개포1동 Cheongdam    1639.          1679.     17.7 Mixed     0.889 1123068          9 강남구_개포1동 -> Cheongdam  1424     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               20
:)   10 2020 10 강남구_개포1동 Cheongdam    1669.          1808.     17.7 Mixed     0.889 1123068         10 강남구_개포1동 -> Cheongdam   719     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               18
:)   # ℹ 302,930 more rows
colSums(is.na(data_filtered_ts))
:)               Time_ts            H_adm_nm        W_commune_nm                flow      flow_corrected            distance             cluster             college        H_adm_cd_shp                Time             hw_link                 new           flow_mean              adm_cd             col_qrt col_qrt_withinSeoul  col_qrt_outofSeoul           total_emp         density_emp        share_of_KBI        share_of_CSI   n_weekdays_wo_hol    n_weekdays_w_hol 
:)                     0                   0                   0                   0                   0                   0                   0                   0                   0                   0                   0                   0                   0                   0                   0              125370              177570                   0                   0                   0                   0                   0                   0

6.7 clean your RAM

rm(w1, w2, w3, w4, w5)
rm(business_calendar_w_holidays, business_calendar_wo_holidays, calendar, covid,
   data_filtered)
rm(b)



7 descriptive statistics

library(knitr)

7.1 flow data

7.1.1 initial wave

SET_WAVE = wave_2
SET_MINUS_TIME = 7
SET_KNOT = 1

data_filtered_ts_ds_wave2 <- data_filtered_ts %>%
  filter(as.character(Time_ts) %in% SET_WAVE) %>%
  as_tibble() %>%
  group_by(Time_ts) %>%
  summarise(count = n(),
            min = min(flow_corrected),
            Q1 = quantile(flow_corrected, 0.25),
            mean = mean(flow_corrected),
            median = median(flow_corrected),            
            Q3 = quantile(flow_corrected, 0.75),
            max = max(flow_corrected))
data_filtered_ts_ds_wave2 
:)   # A tibble: 4 × 8
:)     Time_ts count   min    Q1  mean median    Q3    max
:)       <mth> <int> <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>
:)   1  2020 7 10098 125.   715. 2091.  1294. 2459. 52220.
:)   2  2020 8 10098  66.5  550. 1650.  1010. 1918. 45753.
:)   3  2020 9 10098  91.7  604. 1768.  1100. 2068. 45031.
:)   4 2020 10 10098  49.8  673. 1977.  1223. 2309. 49806.
#%>%
#  knitr::kable()
data_filtered_ts %>%
  filter(as.character(Time_ts) %in% SET_WAVE) %>%
  as_tibble() 
:)   # A tibble: 40,392 × 23
:)      Time_ts H_adm_nm       W_commune_nm  flow flow_corrected distance cluster       college H_adm_cd_shp  Time hw_link                       new flow_mean  adm_cd col_qrt  col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol
:)        <mth> <chr>          <chr>        <dbl>          <dbl>    <dbl> <fct>           <dbl> <chr>        <dbl> <chr>                       <dbl>     <dbl>   <dbl> <fct>    <fct>               <chr>                  <dbl>       <dbl>        <dbl>        <dbl>             <dbl>            <dbl>
:)    1  2020 7 강남구_개포1동 Cheongdam    2017.          2017.     17.7 Mixed           0.889 1123068          7 강남구_개포1동 -> Cheongdam   288     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                22               22
:)    2  2020 8 강남구_개포1동 Cheongdam    1581.          1622.     17.7 Mixed           0.889 1123068          8 강남구_개포1동 -> Cheongdam  2267     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                20               19
:)    3  2020 9 강남구_개포1동 Cheongdam    1639.          1679.     17.7 Mixed           0.889 1123068          9 강남구_개포1동 -> Cheongdam  1424     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               20
:)    4 2020 10 강남구_개포1동 Cheongdam    1669.          1808.     17.7 Mixed           0.889 1123068         10 강남구_개포1동 -> Cheongdam   719     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               18
:)    5  2020 7 강남구_개포1동 Daechi       3583.          3583.     14.9 Mixed           0.889 1123068          7 강남구_개포1동 -> Daechi      288     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                22               22
:)    6  2020 8 강남구_개포1동 Daechi       2886.          2962.     14.9 Mixed           0.889 1123068          8 강남구_개포1동 -> Daechi     2267     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                20               19
:)    7  2020 9 강남구_개포1동 Daechi       2990.          3065.     14.9 Mixed           0.889 1123068          9 강남구_개포1동 -> Daechi     1424     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                21               20
:)    8 2020 10 강남구_개포1동 Daechi       3603.          3903.     14.9 Mixed           0.889 1123068         10 강남구_개포1동 -> Daechi      719     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                21               18
:)    9  2020 7 강남구_개포1동 Guro          473.           473.     29.3 Manufacturing   0.889 1123068          7 강남구_개포1동 -> Guro        288      322. 1123068 high(Q4) high(Q4)            <NA>                  336770        304.         36.3         25.9                22               22
:)   10  2020 8 강남구_개포1동 Guro          387.           397.     29.3 Manufacturing   0.889 1123068          8 강남구_개포1동 -> Guro       2267      322. 1123068 high(Q4) high(Q4)            <NA>                  336770        304.         36.3         25.9                20               19
:)   # ℹ 40,382 more rows

7.1.2 three waves

# Early Wave
SET_WAVE = wave_2
SET_MINUS_TIME = 7
SET_KNOT = 1

data_filtered_ts_ds_wave2 <- data_filtered_ts %>%
  filter(as.character(Time_ts) %in% SET_WAVE) %>%
  as_tibble() %>%                
  group_by(Time_ts) %>%               # rstatix::get_summary_stats 써볼것
  summarise(count = n(),
            mean = mean(flow_corrected),
            sd = sd(flow_corrected),
            min = min(flow_corrected),
            max = max(flow_corrected))
data_filtered_ts_ds_wave2
:)   # A tibble: 4 × 6
:)     Time_ts count  mean    sd   min    max
:)       <mth> <int> <dbl> <dbl> <dbl>  <dbl>
:)   1  2020 7 10098 2091. 2661. 125.  52220.
:)   2  2020 8 10098 1650. 2194.  66.5 45753.
:)   3  2020 9 10098 1768. 2252.  91.7 45031.
:)   4 2020 10 10098 1977. 2527.  49.8 49806.
# Delta Wave
SET_WAVE = wave_4
SET_MINUS_TIME = 18
SET_KNOT = 2 

data_filtered_ts_ds_wave4 <- data_filtered_ts %>%
  filter(as.character(Time_ts) %in% SET_WAVE) %>%
  as_tibble() %>%
  group_by(Time_ts) %>%
  summarise(count = n(),
            mean = mean(flow_corrected),
            sd = sd(flow_corrected),
            min = min(flow_corrected),
            max = max(flow_corrected))
data_filtered_ts_ds_wave4
:)   # A tibble: 7 × 6
:)     Time_ts count  mean    sd   min    max
:)       <mth> <int> <dbl> <dbl> <dbl>  <dbl>
:)   1  2021 6 10098 2016. 2595.  151. 48337.
:)   2  2021 7 10098 1858. 2393.  120. 44093.
:)   3  2021 8 10098 1708. 2215.  115. 40601.
:)   4  2021 9 10098 1713. 2254.  108. 41486.
:)   5 2021 10 10098 1697. 2223.  114. 41618.
:)   6 2021 11 10098 2002. 2589.  160. 51414.
:)   7 2021 12 10098 2356. 3031.  137. 60579.
# Omicron Wave
SET_WAVE = wave_5
SET_MINUS_TIME = 24
SET_KNOT = 2

data_filtered_ts_ds_wave5 <- data_filtered_ts %>%
  filter(as.character(Time_ts) %in% SET_WAVE) %>%
  as_tibble() %>%
  group_by(Time_ts) %>%
  summarise(count = n(),
            mean = mean(flow_corrected),
            sd = sd(flow_corrected),
            min = min(flow_corrected),
            max = max(flow_corrected))
data_filtered_ts_ds_wave5
:)   # A tibble: 6 × 6
:)     Time_ts count  mean    sd    min    max
:)       <mth> <int> <dbl> <dbl>  <dbl>  <dbl>
:)   1 2021 12 10098 2356. 3031. 137.   60579.
:)   2  2022 1 10098 1849. 2365.  76.1  48175.
:)   3  2022 2 10098 1557. 2001.   7.01 40030.
:)   4  2022 3 10098 1815. 2382.   6.95 45170.
:)   5  2022 4 10098 1887. 2499.  10.1  49327.
:)   6  2022 5 10098 2057. 2703.  17.0  52263.
data_filtered_ts_ds_threewaves <- data_filtered_ts_ds_wave2 %>%
  bind_rows(data_filtered_ts_ds_wave4) %>%
  bind_rows(data_filtered_ts_ds_wave5) 

#data_filtered_ts_ds_threewaves %>%
#  knitr::kable()

7.2 time-invariant predictors

7.2.1 initial wave

SET_WAVE = wave_2
SET_MINUS_TIME = 7
SET_KNOT = 1

data_filtered_ts_ds_wave2 <- data_filtered_ts %>%
  filter(as.character(Time_ts) %in% SET_WAVE) %>%
  as_tibble() 
#data_filtered_ts_ds_wave2 %>%
#  knitr::kable()
data_filtered_ts_ds_wave2
:)   # A tibble: 40,392 × 23
:)      Time_ts H_adm_nm       W_commune_nm  flow flow_corrected distance cluster       college H_adm_cd_shp  Time hw_link                       new flow_mean  adm_cd col_qrt  col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol
:)        <mth> <chr>          <chr>        <dbl>          <dbl>    <dbl> <fct>           <dbl> <chr>        <dbl> <chr>                       <dbl>     <dbl>   <dbl> <fct>    <fct>               <chr>                  <dbl>       <dbl>        <dbl>        <dbl>             <dbl>            <dbl>
:)    1  2020 7 강남구_개포1동 Cheongdam    2017.          2017.     17.7 Mixed           0.889 1123068          7 강남구_개포1동 -> Cheongdam   288     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                22               22
:)    2  2020 8 강남구_개포1동 Cheongdam    1581.          1622.     17.7 Mixed           0.889 1123068          8 강남구_개포1동 -> Cheongdam  2267     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                20               19
:)    3  2020 9 강남구_개포1동 Cheongdam    1639.          1679.     17.7 Mixed           0.889 1123068          9 강남구_개포1동 -> Cheongdam  1424     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               20
:)    4 2020 10 강남구_개포1동 Cheongdam    1669.          1808.     17.7 Mixed           0.889 1123068         10 강남구_개포1동 -> Cheongdam   719     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               18
:)    5  2020 7 강남구_개포1동 Daechi       3583.          3583.     14.9 Mixed           0.889 1123068          7 강남구_개포1동 -> Daechi      288     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                22               22
:)    6  2020 8 강남구_개포1동 Daechi       2886.          2962.     14.9 Mixed           0.889 1123068          8 강남구_개포1동 -> Daechi     2267     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                20               19
:)    7  2020 9 강남구_개포1동 Daechi       2990.          3065.     14.9 Mixed           0.889 1123068          9 강남구_개포1동 -> Daechi     1424     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                21               20
:)    8 2020 10 강남구_개포1동 Daechi       3603.          3903.     14.9 Mixed           0.889 1123068         10 강남구_개포1동 -> Daechi      719     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                21               18
:)    9  2020 7 강남구_개포1동 Guro          473.           473.     29.3 Manufacturing   0.889 1123068          7 강남구_개포1동 -> Guro        288      322. 1123068 high(Q4) high(Q4)            <NA>                  336770        304.         36.3         25.9                22               22
:)   10  2020 8 강남구_개포1동 Guro          387.           397.     29.3 Manufacturing   0.889 1123068          8 강남구_개포1동 -> Guro       2267      322. 1123068 high(Q4) high(Q4)            <NA>                  336770        304.         36.3         25.9                20               19
:)   # ℹ 40,382 more rows
#install.packages('gtsummary')
library(gtsummary)
data_filtered_ts_ds_wave2 %>%
  filter(as.character(Time_ts) == "2020 7") %>%
  select(distance, college) %>%
  mutate(college = college * 100) %>%
  summarise(
            min = min(distance),
            Q1 = quantile(distance, 0.25),
            mean = mean(distance),
            median = median(distance),            
            Q3 = quantile(distance, 0.75),
            max = max(distance)) %>%
  knitr::kable()
min Q1 mean median Q3 max
11.9 24.2 29.4 29.4 34.6 54.2
data_filtered_ts_ds_wave2 %>%
  filter(as.character(Time_ts) == "2020 7") %>%
  select(distance, college) %>%
  mutate(college = college * 100) %>%
  summarise(
            min = min(college),
            Q1 = quantile(college, 0.25),
            mean = mean(college),
            median = median(college),            
            Q3 = quantile(college, 0.75),
            max = max(college)) %>%
  knitr::kable()
min Q1 mean median Q3 max
14.4 40.1 53.3 52.3 65.5 94.6
data_filtered_ts_ds_wave2 %>%
  filter(as.character(Time_ts) == "2020 7") %>%
  select(cluster, distance, college) %>%
  mutate(college = college * 100)
:)   # A tibble: 10,098 × 3
:)      cluster       distance college
:)      <fct>            <dbl>   <dbl>
:)    1 Mixed             17.7    88.9
:)    2 Mixed             14.9    88.9
:)    3 Manufacturing     29.3    88.9
:)    4 Manufacturing     25.1    88.9
:)    5 Mixed             18.1    88.9
:)    6 Mixed             25.2    88.9
:)    7 Manufacturing     20.9    88.9
:)    8 Financial         25.8    88.9
:)    9 Professional      16.9    88.9
:)   10 Professional      17.9    88.9
:)   # ℹ 10,088 more rows
data_filtered_ts_ds_wave2 %>%
  filter(as.character(Time_ts) == "2020 7") %>%
  select(cluster, distance, college) %>%
  mutate(college = college * 100) %>%
  gtsummary::tbl_summary(
    by = cluster,
    statistic = list(distance = "{mean} ({sd})",
                     college = "{mean} ({sd})"),
    digits = list(distance ~ c(2, 2),
                  college ~ c(2, 2))) %>%
  gtsummary::as_kable()
Characteristic Manufacturing, N = 2,611 Mixed, N = 3,850 Professional, N = 2,141 Financial, N = 1,496
distance 29.28 (7.38) 28.76 (7.18) 30.67 (7.65) 29.65 (7.10)
college 53.17 (16.81) 53.62 (16.47) 53.35 (16.63) 52.93 (16.64)



8 lme4 for longitudinal models(rescaled)

library(lme4)

8.1 model buildup: flow_corrected

set_theme(base = theme_grey(),
          geom.alpha = 1)

8.1.1 proper data structure

SET_WAVE = wave_2
SET_MINUS_TIME = 7
SET_KNOT = 1

# import - window cut
data_filtered_ts_lmm <- data_filtered_ts %>%
    filter(as.character(Time_ts) %in% SET_WAVE) %>%
# centered and scaled and zeroed
    mutate(distance = scale(distance, center = TRUE, scale = TRUE),
           college = scale(college, center = TRUE, scale = TRUE))  %>%
    mutate(Time = Time - SET_MINUS_TIME)  %>%

# into rate of change using dplyr::first()
    group_by(hw_link) %>%
    arrange(hw_link, Time) %>%
    mutate(first = dplyr::first(flow_corrected)) %>%
    mutate(flow_corrected = flow_corrected/first * 100) %>%
    ungroup() %>%
    
# time-dedicated predictors and offset
    mutate(pre_knot = ifelse(Time > SET_KNOT, SET_KNOT, Time),
           post_knot = ifelse(Time > SET_KNOT, Time-SET_KNOT, 0)) %>%
    relocate(Time_ts, hw_link, Time, pre_knot, post_knot) %>%
    mutate(offset_100 = 100)
data_filtered_ts_lmm
:)   # A tsibble: 40,392 x 27 [1M]
:)   # Key:       hw_link [10,098]
:)      Time_ts hw_link                      Time pre_knot post_knot H_adm_nm       W_commune_nm  flow flow_corrected distance[,1] cluster       college[,1] H_adm_cd_shp   new flow_mean  adm_cd col_qrt  col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol first offset_100
:)        <mth> <chr>                       <dbl>    <dbl>     <dbl> <chr>          <chr>        <dbl>          <dbl>        <dbl> <fct>               <dbl> <chr>        <dbl>     <dbl>   <dbl> <fct>    <fct>               <chr>                  <dbl>       <dbl>        <dbl>        <dbl>             <dbl>            <dbl> <dbl>      <dbl>
:)    1  2020 7 강남구_개포1동 -> Cheongdam     0        0         0 강남구_개포1동 Cheongdam    2017.          100        -1.59   Mixed                2.14 1123068        288     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                22               22 2017.        100
:)    2  2020 8 강남구_개포1동 -> Cheongdam     1        1         0 강남구_개포1동 Cheongdam    1581.           80.4      -1.59   Mixed                2.14 1123068       2267     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                20               19 2017.        100
:)    3  2020 9 강남구_개포1동 -> Cheongdam     2        1         1 강남구_개포1동 Cheongdam    1639.           83.2      -1.59   Mixed                2.14 1123068       1424     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               20 2017.        100
:)    4 2020 10 강남구_개포1동 -> Cheongdam     3        1         2 강남구_개포1동 Cheongdam    1669.           89.6      -1.59   Mixed                2.14 1123068        719     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               18 2017.        100
:)    5  2020 7 강남구_개포1동 -> Daechi        0        0         0 강남구_개포1동 Daechi       3583.          100        -1.98   Mixed                2.14 1123068        288     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                22               22 3583.        100
:)    6  2020 8 강남구_개포1동 -> Daechi        1        1         0 강남구_개포1동 Daechi       2886.           82.7      -1.98   Mixed                2.14 1123068       2267     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                20               19 3583.        100
:)    7  2020 9 강남구_개포1동 -> Daechi        2        1         1 강남구_개포1동 Daechi       2990.           85.5      -1.98   Mixed                2.14 1123068       1424     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                21               20 3583.        100
:)    8 2020 10 강남구_개포1동 -> Daechi        3        1         2 강남구_개포1동 Daechi       3603.          109.       -1.98   Mixed                2.14 1123068        719     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                21               18 3583.        100
:)    9  2020 7 강남구_개포1동 -> Guro          0        0         0 강남구_개포1동 Guro          473.          100        -0.0148 Manufacturing        2.14 1123068        288      322. 1123068 high(Q4) high(Q4)            <NA>                  336770        304.         36.3         25.9                22               22  473.        100
:)   10  2020 8 강남구_개포1동 -> Guro          1        1         0 강남구_개포1동 Guro          387.           83.9      -0.0148 Manufacturing        2.14 1123068       2267      322. 1123068 high(Q4) high(Q4)            <NA>                  336770        304.         36.3         25.9                20               19  473.        100
:)   # ℹ 40,382 more rows

8.2 EDA

:)   # A tibble: 40,392 × 5
:)      hw_link                      Time pre_knot post_knot cluster      
:)      <chr>                       <dbl>    <dbl>     <dbl> <fct>        
:)    1 강남구_개포1동 -> Cheongdam     0        0         0 Mixed        
:)    2 강남구_개포1동 -> Cheongdam     1        1         0 Mixed        
:)    3 강남구_개포1동 -> Cheongdam     2        1         1 Mixed        
:)    4 강남구_개포1동 -> Cheongdam     3        1         2 Mixed        
:)    5 강남구_개포1동 -> Daechi        0        0         0 Mixed        
:)    6 강남구_개포1동 -> Daechi        1        1         0 Mixed        
:)    7 강남구_개포1동 -> Daechi        2        1         1 Mixed        
:)    8 강남구_개포1동 -> Daechi        3        1         2 Mixed        
:)    9 강남구_개포1동 -> Guro          0        0         0 Manufacturing
:)   10 강남구_개포1동 -> Guro          1        1         0 Manufacturing
:)   # ℹ 40,382 more rows

8.3 analysis: Wave2

thanks to Moerbeek(2022) Power analysis of longitudinal studies with piecewise linear growth and attrition

8.3.1 Equation in Latex

$$ \[\begin{align} \label{} Level 1 \\ flow_{ti} &= \pi_{1i} Time_{pre, ti} + \pi_{2i} Time_{post, ti} + \epsilon_{ti} \tag 1 \\ \\\\ Level 2 \\ \pi_{1i} &= \gamma_{10} + \gamma_{11}Distance + \gamma_{12}Cluster + \gamma_{13}Cluster*College + \xi_{1i} \tag 2 \\ \pi_{2i} &= \gamma_{20} + \gamma_{21}Distance + \gamma_{22}Cluster + \gamma_{23}Cluster*College + \xi_{2i} \tag 3 \\ \\\\\\ Time_{pre, ti} &= \begin{cases} Time, & \text{if }\ Time \le Knot \\ Knot, & \text{otherwise} \end{cases} \tag 4 \\ Time_{post, ti} &= \begin{cases} Time - Knot, & \text{if }\ Time > Knot \\ 0, & \text{otherwise} \end{cases} \tag 5 \end{align}\] $$

$$ \[\begin{align} \label{} Level 1 \\ flow_{ti} &= \pi_{1i} Time_{pre, ti} + \pi_{2i} Time_{post, ti} + \epsilon_{ti} \tag 1 \\ \\ Level 2 \\ \pi_{1i} &= \gamma_{10} + \gamma_{11}Distance + \gamma_{12}Cluster + \gamma_{13}Cluster*College + \xi_{1i} \tag 2 \\ \pi_{2i} &= \gamma_{20} + \gamma_{21}Distance + \gamma_{22}Cluster + \gamma_{23}Cluster*College + \xi_{2i} \tag 3 \\ \\\\ Time_{pre, ti} &= \begin{cases} Time, & \text{if }\ Time \le Knot \\ Knot, & \text{otherwise} \end{cases} \tag 4 \\ Time_{post, ti} &= \begin{cases} Time - Knot, & \text{if }\ Time > Knot \\ 0, & \text{otherwise} \end{cases} \tag 5 \end{align}\] $$

8.3.2 1 : Unconditional Growth Model

options(max.print=1000)

interceptpre_knotpost_knot에서 조금씩 빼가는 역할을 함.

wave_2_m1 <- lme4::lmer(flow_corrected ~ 
                                # fixed effect
                                -1 +
                                (pre_knot + post_knot) + 
                                (1) : (pre_knot + post_knot) +
                                
                                # random effect
                                (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link),
                            offset = offset_100,
                            data = data_filtered_ts_lmm)
#summary(wave_2_m1)

8.3.3 2 : add Distance

wave_2_m2 <- lme4::lmer(flow_corrected ~ 
                                # fixed effect
                                -1 +
                                (pre_knot + post_knot) + 
                                (1 + distance) : (pre_knot + post_knot) +
                                
                                # random effect
                                (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link),
                            offset = offset_100,
                            data = data_filtered_ts_lmm)
#summary(wave_2_m2)

8.3.4 3 : add Cluster

wave_2_m3 <- lme4::lmer(flow_corrected ~ 
                                # fixed effect
                                -1 +
                                (pre_knot + post_knot) + 
                                (1 + distance + cluster) : (pre_knot + post_knot) +
                                
                                # random effect
                                (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link),
                            offset = offset_100,
                            data = data_filtered_ts_lmm)
#summary(wave_2_m3)

8.3.5 4 : add College within cluster

wave_2_m4 <- lme4::lmer(flow_corrected ~ 
                                # fixed effect
                                -1 +
                                (pre_knot + post_knot) + 
                                (1 + distance + cluster + cluster:college) : (pre_knot + post_knot) +
                                
                                # random effect
                                (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link),
                            offset = offset_100,
                            data = data_filtered_ts_lmm)
summary(wave_2_m4)
:)   Linear mixed model fit by REML ['lmerMod']
:)   Formula: flow_corrected ~ -1 + (pre_knot + post_knot) + (1 + distance +      cluster + cluster:college):(pre_knot + post_knot) + (-1 +      pre_knot | hw_link) + (-1 + post_knot | hw_link)
:)      Data: data_filtered_ts_lmm
:)    Offset: offset_100
:)   
:)   REML criterion at convergence: 301695
:)   
:)   Scaled residuals: 
:)      Min     1Q Median     3Q    Max 
:)   -9.432 -0.296  0.000  0.302  9.851 
:)   
:)   Random effects:
:)    Groups    Name      Variance Std.Dev.
:)    hw_link   pre_knot  124.0    11.14   
:)    hw_link.1 post_knot  56.1     7.49   
:)    Residual             40.0     6.32   
:)   Number of obs: 40392, groups:  hw_link, 10098
:)   
:)   Fixed effects:
:)                                          Estimate Std. Error t value
:)   (Intercept)                              0.0000     0.0629    0.00
:)   pre_knot                               -18.8101     0.2534  -74.22
:)   post_knot                                8.9512     0.1708   52.42
:)   pre_knot:distance                       -2.2980     0.1382  -16.63
:)   post_knot:distance                       1.2502     0.0961   13.01
:)   pre_knot:clusterMixed                   -2.2268     0.3181   -7.00
:)   pre_knot:clusterProfessional            -3.0393     0.3667   -8.29
:)   pre_knot:clusterFinancial               -9.6667     0.4068  -23.76
:)   post_knot:clusterMixed                  -1.3191     0.2213   -5.96
:)   post_knot:clusterProfessional           -1.6995     0.2550   -6.66
:)   post_knot:clusterFinancial               4.1663     0.2830   14.72
:)   pre_knot:clusterManufacturing:college   -0.7762     0.2467   -3.15
:)   pre_knot:clusterMixed:college           -3.1511     0.2109  -14.94
:)   pre_knot:clusterProfessional:college    -1.9157     0.2821   -6.79
:)   pre_knot:clusterFinancial:college       -4.1068     0.3282  -12.51
:)   post_knot:clusterManufacturing:college  -0.0374     0.1716   -0.22
:)   post_knot:clusterMixed:college           1.2395     0.1467    8.45
:)   post_knot:clusterProfessional:college    0.0830     0.1962    0.42
:)   post_knot:clusterFinancial:college       1.4120     0.2283    6.18

8.3.6 5 : Distance within cluster(크게 소용없음)

#wave_2_m5 <- lme4::lmer(flow_corrected ~ 
#                                # fixed effect
#                                -1 +
#                                (pre_knot + post_knot) + 
#                                (1 + cluster + cluster:college + cluster:distance) : (pre_knot + post_knot) +
#                                
#                                # random effect
#                                (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link),
#                            offset = offset_100,
#
#                       data = data_filtered_ts_lmm)
#summary(wave_2_m5)

8.4 log-ratio test

anova(wave_2_m1, wave_2_m2, wave_2_m3, wave_2_m4)
:)   Data: data_filtered_ts_lmm
:)   Models:
:)   wave_2_m1: flow_corrected ~ -1 + (pre_knot + post_knot) + (1):(pre_knot + post_knot) + (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link)
:)   wave_2_m2: flow_corrected ~ -1 + (pre_knot + post_knot) + (1 + distance):(pre_knot + post_knot) + (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link)
:)   wave_2_m3: flow_corrected ~ -1 + (pre_knot + post_knot) + (1 + distance + cluster):(pre_knot + post_knot) + (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link)
:)   wave_2_m4: flow_corrected ~ -1 + (pre_knot + post_knot) + (1 + distance + cluster + cluster:college):(pre_knot + post_knot) + (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link)
:)             npar    AIC    BIC  logLik deviance Chisq Df          Pr(>Chisq)    
:)   wave_2_m1    6 303246 303298 -151617   303234                                 
:)   wave_2_m2    8 303050 303118 -151517   303034   201  2 <0.0000000000000002 ***
:)   wave_2_m3   14 302136 302257 -151054   302108   926  6 <0.0000000000000002 ***
:)   wave_2_m4   22 301710 301900 -150833   301666   442  8 <0.0000000000000002 ***
:)   ---
:)   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

8.5 tab_model

library(sjPlot)
library(sjmisc)
library(sjlabelled)
pl <- c(
# at the baseline : knot    
`(Intercept)` = "Intercept",                                                        

# pre-knot
`pre_knot` = "pre-knot",
`pre_knot:distance` = "  distance",
`pre_knot:clusterManufacturing` = "  Manufacturing",
`pre_knot:clusterMixed` = "  Mixed",
`pre_knot:clusterProfessional` =  "  Professional",
`pre_knot:clusterFinancial` =  "  Financial",
`pre_knot:clusterManufacturing:college` =  "  Manufacturing:college",
`pre_knot:clusterMixed:college` =  "  Mixed:college",
`pre_knot:clusterProfessional:college` =  "  Professional:college",
`pre_knot:clusterFinancial:college` = "  Financial:college",

# post-knot
`post_knot` = "post-knot",
`post_knot:distance` = "  distance",
`post_knot:clusterManufacturing` = "  Manufacturing",
`post_knot:clusterMixed` = "  Mixed",
`post_knot:clusterProfessional` = "  Professional",
`post_knot:clusterFinancial` = "  Financial",
`post_knot:clusterManufacturing:college` = "  Manufacturing:college",
`post_knot:clusterMixed:college` = "  Mixed:college",
`post_knot:clusterProfessional:college` = "  Professional:college",
`post_knot:clusterFinancial:college` = "  Financial:college"
)
tab_model(
     wave_2_m1,
     wave_2_m2,
     wave_2_m3,
     wave_2_m4,
     show.reflvl = TRUE, show.loglik = FALSE, show.dev = TRUE,  show.df = TRUE, show.aic = TRUE, 
     show.se = TRUE, show.ci = FALSE, 
     title = "title here",
     p.style = "numeric_stars", collapse.se = FALSE,
     auto.label = TRUE, 
     pred.labels = pl,
     rm.terms = c("Manufacturing", "Mixed", "Professional", "Financial"), 
     order.terms = c(1, 
                     2, 4, 6:8, 12:15,
                     3, 5, 9:11, 16:19),
     col.order = c("est", "se", "p"),
     dv.labels = c("Model 1", "Model 2", "Model 3", "Model 4"))
title here
  Model 1 Model 2 Model 3 Model 4
Predictors Estimates std. Error p Estimates std. Error p Estimates std. Error p Estimates std. Error p
Intercept -0.00 0.06 1.000 0.00 0.06 1.000 0.00 0.06 1.000 0.00 0.06 1.000
pre-knot -21.74 *** 0.14 <0.001 -21.74 *** 0.14 <0.001 -18.78 *** 0.26 <0.001 -18.81 *** 0.25 <0.001
distance -1.38 *** 0.13 <0.001 -1.30 *** 0.13 <0.001 -2.30 *** 0.14 <0.001
Mixed -2.22 *** 0.32 <0.001 -2.23 *** 0.32 <0.001
Professional -3.24 *** 0.37 <0.001 -3.04 *** 0.37 <0.001
Financial -9.62 *** 0.41 <0.001 -9.67 *** 0.41 <0.001
Manufacturing:college -0.78 ** 0.25 0.002
Mixed:college -3.15 *** 0.21 <0.001
Professional:college -1.92 *** 0.28 <0.001
Financial:college -4.11 *** 0.33 <0.001
post-knot 8.71 *** 0.09 <0.001 8.71 *** 0.09 <0.001 8.95 *** 0.17 <0.001 8.95 *** 0.17 <0.001
distance 0.99 *** 0.09 <0.001 0.98 *** 0.09 <0.001 1.25 *** 0.10 <0.001
Mixed -1.32 *** 0.22 <0.001 -1.32 *** 0.22 <0.001
Professional -1.65 *** 0.26 <0.001 -1.70 *** 0.26 <0.001
Financial 4.14 *** 0.28 <0.001 4.17 *** 0.28 <0.001
Manufacturing:college -0.04 0.17 0.827
Mixed:college 1.24 *** 0.15 <0.001
Professional:college 0.08 0.20 0.672
Financial:college 1.41 *** 0.23 <0.001
Random Effects
σ2 40.20 40.15 40.03 39.96
τ00        
τ00        
τ11 137.67 hw_link.pre_knot 136.36 hw_link.pre_knot 129.07 hw_link.pre_knot 124.00 hw_link.pre_knot
60.18 hw_link.1.post_knot 59.50 hw_link.1.post_knot 56.57 hw_link.1.post_knot 56.13 hw_link.1.post_knot
ρ01        
ρ01        
ICC 0.72 0.72 0.71 0.70
N 10098 hw_link 10098 hw_link 10098 hw_link 10098 hw_link
Observations 40392 40392 40392 40392
Marginal R2 / Conditional R2 0.327 / 0.811 0.331 / 0.811 0.355 / 0.811 0.369 / 0.810
Deviance 303234.471 303033.595 302108.040 301666.369
AIC 303255.426 303063.851 302155.002 301739.238
  • p<0.05   ** p<0.01   *** p<0.001



9 apply to each spline

wave_2 <- c("2020 7", "2020 8", "2020 9", "2020 10") 
wave_4 <- c("2021 6", "2021 7", "2021 8", "2021 9", "2021 10", "2021 11", "2021 12")
wave_5 <- c("2021 12", "2022 1", "2022 2", "2022 3", "2022 4", "2022 5")

9.1 Wave 4

9.1.1 proper data structure(WARNING!)

SET_WAVE = wave_4
SET_MINUS_TIME = 18
SET_KNOT = 2        # 다시 SET_KNOT = 2로 하기로 함.
# import - window cut
data_filtered_ts_lmm <- data_filtered_ts %>%
    filter(as.character(Time_ts) %in% SET_WAVE) %>%
# centered and scaled and zeroed
    mutate(distance = scale(distance, center = TRUE, scale = TRUE),
           college = scale(college, center = TRUE, scale = TRUE))  %>%
    mutate(Time = Time - SET_MINUS_TIME)  %>%
# into rate of change
    group_by(hw_link) %>%
    arrange(hw_link, Time) %>%
    mutate(first = dplyr::first(flow_corrected)) %>%
    mutate(flow_corrected = flow_corrected/first * 100) %>%
    ungroup() %>%
# time-dedicated predictors and offset
    mutate(pre_knot = ifelse(Time > SET_KNOT, SET_KNOT, Time),
           post_knot = ifelse(Time > SET_KNOT, Time-SET_KNOT, 0)) %>%
    relocate(Time_ts, hw_link, Time, pre_knot, post_knot) %>%
    mutate(offset_100 = 100)
data_filtered_ts_lmm
:)   # A tsibble: 70,686 x 27 [1M]
:)   # Key:       hw_link [10,098]
:)      Time_ts hw_link                      Time pre_knot post_knot H_adm_nm       W_commune_nm  flow flow_corrected distance[,1] cluster college[,1] H_adm_cd_shp   new flow_mean  adm_cd col_qrt  col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol first offset_100
:)        <mth> <chr>                       <dbl>    <dbl>     <dbl> <chr>          <chr>        <dbl>          <dbl>        <dbl> <fct>         <dbl> <chr>        <dbl>     <dbl>   <dbl> <fct>    <fct>               <chr>                  <dbl>       <dbl>        <dbl>        <dbl>             <dbl>            <dbl> <dbl>      <dbl>
:)    1  2021 6 강남구_개포1동 -> Cheongdam     0        0         0 강남구_개포1동 Cheongdam    1844.          100          -1.59 Mixed          2.14 1123068       6075     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               21 1844.        100
:)    2  2021 7 강남구_개포1동 -> Cheongdam     1        1         0 강남구_개포1동 Cheongdam    1736.           94.2        -1.59 Mixed          2.14 1123068      14377     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               21 1844.        100
:)    3  2021 8 강남구_개포1동 -> Cheongdam     2        2         0 강남구_개포1동 Cheongdam    1434.           77.8        -1.59 Mixed          2.14 1123068      15019     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               21 1844.        100
:)    4  2021 9 강남구_개포1동 -> Cheongdam     3        2         1 강남구_개포1동 Cheongdam    1105.           63.1        -1.59 Mixed          2.14 1123068      21148     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               19 1844.        100
:)    5 2021 10 강남구_개포1동 -> Cheongdam     4        2         2 강남구_개포1동 Cheongdam    1273.           69.0        -1.59 Mixed          2.14 1123068      19123     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                20               20 1844.        100
:)    6 2021 11 강남구_개포1동 -> Cheongdam     5        2         3 강남구_개포1동 Cheongdam    1416.           76.8        -1.59 Mixed          2.14 1123068      34711     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               21 1844.        100
:)    7 2021 12 강남구_개포1동 -> Cheongdam     6        2         4 강남구_개포1동 Cheongdam    1630.           88.4        -1.59 Mixed          2.14 1123068      70977     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                22               22 1844.        100
:)    8  2021 6 강남구_개포1동 -> Daechi        0        0         0 강남구_개포1동 Daechi       3168.          100          -1.98 Mixed          2.14 1123068       6075     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                21               21 3168.        100
:)    9  2021 7 강남구_개포1동 -> Daechi        1        1         0 강남구_개포1동 Daechi       2995.           94.6        -1.98 Mixed          2.14 1123068      14377     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                21               21 3168.        100
:)   10  2021 8 강남구_개포1동 -> Daechi        2        2         0 강남구_개포1동 Daechi       2881.           90.9        -1.98 Mixed          2.14 1123068      15019     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                21               21 3168.        100
:)   # ℹ 70,676 more rows
data_filtered_ts_lmm <- data_filtered_ts_lmm %>%
    mutate(flow_corrected = if_else((as.character(Time_ts) %in% c("2021 11", "2021 12")) 
                                    & (as.character(cluster) %in% c("Professional", "Financial")) 
                                    & (college > quantile(college, 0.75)), 
                                        flow_corrected * 0.97, flow_corrected))
data_filtered_ts_lmm
:)   # A tsibble: 70,686 x 27 [1M]
:)   # Key:       hw_link [10,098]
:)      Time_ts hw_link                      Time pre_knot post_knot H_adm_nm       W_commune_nm  flow flow_corrected distance[,1] cluster college[,1] H_adm_cd_shp   new flow_mean  adm_cd col_qrt  col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol first offset_100
:)        <mth> <chr>                       <dbl>    <dbl>     <dbl> <chr>          <chr>        <dbl>          <dbl>        <dbl> <fct>         <dbl> <chr>        <dbl>     <dbl>   <dbl> <fct>    <fct>               <chr>                  <dbl>       <dbl>        <dbl>        <dbl>             <dbl>            <dbl> <dbl>      <dbl>
:)    1  2021 6 강남구_개포1동 -> Cheongdam     0        0         0 강남구_개포1동 Cheongdam    1844.          100          -1.59 Mixed          2.14 1123068       6075     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               21 1844.        100
:)    2  2021 7 강남구_개포1동 -> Cheongdam     1        1         0 강남구_개포1동 Cheongdam    1736.           94.2        -1.59 Mixed          2.14 1123068      14377     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               21 1844.        100
:)    3  2021 8 강남구_개포1동 -> Cheongdam     2        2         0 강남구_개포1동 Cheongdam    1434.           77.8        -1.59 Mixed          2.14 1123068      15019     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               21 1844.        100
:)    4  2021 9 강남구_개포1동 -> Cheongdam     3        2         1 강남구_개포1동 Cheongdam    1105.           63.1        -1.59 Mixed          2.14 1123068      21148     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               19 1844.        100
:)    5 2021 10 강남구_개포1동 -> Cheongdam     4        2         2 강남구_개포1동 Cheongdam    1273.           69.0        -1.59 Mixed          2.14 1123068      19123     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                20               20 1844.        100
:)    6 2021 11 강남구_개포1동 -> Cheongdam     5        2         3 강남구_개포1동 Cheongdam    1416.           76.8        -1.59 Mixed          2.14 1123068      34711     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               21 1844.        100
:)    7 2021 12 강남구_개포1동 -> Cheongdam     6        2         4 강남구_개포1동 Cheongdam    1630.           88.4        -1.59 Mixed          2.14 1123068      70977     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                22               22 1844.        100
:)    8  2021 6 강남구_개포1동 -> Daechi        0        0         0 강남구_개포1동 Daechi       3168.          100          -1.98 Mixed          2.14 1123068       6075     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                21               21 3168.        100
:)    9  2021 7 강남구_개포1동 -> Daechi        1        1         0 강남구_개포1동 Daechi       2995.           94.6        -1.98 Mixed          2.14 1123068      14377     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                21               21 3168.        100
:)   10  2021 8 강남구_개포1동 -> Daechi        2        2         0 강남구_개포1동 Daechi       2881.           90.9        -1.98 Mixed          2.14 1123068      15019     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                21               21 3168.        100
:)   # ℹ 70,676 more rows

9.1.2 EDA

9.1.3 analysis

wave_4_m4 <- lme4::lmer(flow_corrected ~ 
                                # fixed effect
                                -1 +
                                (pre_knot + post_knot) + 
                                (1 + distance + cluster + cluster:college) : (pre_knot + post_knot) +
                                
                                # random effect
                                (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link),
                            offset = offset_100,
                            data = data_filtered_ts_lmm)
summary(wave_4_m4)
:)   Linear mixed model fit by REML ['lmerMod']
:)   Formula: flow_corrected ~ -1 + (pre_knot + post_knot) + (1 + distance +      cluster + cluster:college):(pre_knot + post_knot) + (-1 +      pre_knot | hw_link) + (-1 + post_knot | hw_link)
:)      Data: data_filtered_ts_lmm
:)    Offset: offset_100
:)   
:)   REML criterion at convergence: 562227
:)   
:)   Scaled residuals: 
:)      Min     1Q Median     3Q    Max 
:)   -6.255 -0.464 -0.125  0.527  6.400 
:)   
:)   Random effects:
:)    Groups    Name      Variance Std.Dev.
:)    hw_link   pre_knot  36.6      6.05   
:)    hw_link.1 post_knot 23.7      4.86   
:)    Residual            99.9     10.00   
:)   Number of obs: 70686, groups:  hw_link, 10098
:)   
:)   Fixed effects:
:)                                          Estimate Std. Error t value
:)   (Intercept)                              1.2453     0.0902   13.81
:)   pre_knot                               -10.1099     0.1470  -68.75
:)   post_knot                                7.5680     0.1122   67.48
:)   pre_knot:distance                       -0.4400     0.0776   -5.67
:)   post_knot:distance                       0.4538     0.0631    7.19
:)   pre_knot:clusterMixed                   -0.7080     0.1787   -3.96
:)   pre_knot:clusterProfessional            -1.5001     0.2060   -7.28
:)   pre_knot:clusterFinancial               -1.8013     0.2286   -7.88
:)   post_knot:clusterMixed                   0.6196     0.1452    4.27
:)   post_knot:clusterProfessional            0.1455     0.1674    0.87
:)   post_knot:clusterFinancial               1.3275     0.1858    7.15
:)   pre_knot:clusterManufacturing:college   -0.4885     0.1386   -3.52
:)   pre_knot:clusterMixed:college           -0.6908     0.1185   -5.83
:)   pre_knot:clusterProfessional:college    -0.4349     0.1585   -2.74
:)   pre_knot:clusterFinancial:college       -0.3689     0.1844   -2.00
:)   post_knot:clusterManufacturing:college   0.1343     0.1126    1.19
:)   post_knot:clusterMixed:college           0.0979     0.0963    1.02
:)   post_knot:clusterProfessional:college   -0.5521     0.1288   -4.29
:)   post_knot:clusterFinancial:college      -0.4987     0.1499   -3.33

9.2 Wave 5

9.2.1 proper data structure

SET_WAVE = wave_5
SET_MINUS_TIME = 24
SET_KNOT = 2

# import - window cut
data_filtered_ts_lmm <- data_filtered_ts %>%
    filter(as.character(Time_ts) %in% SET_WAVE) %>%

# centered and scaled and zeroed
    mutate(distance = scale(distance, center = TRUE, scale = TRUE),
           college = scale(college, center = TRUE, scale = TRUE))  %>%
    mutate(Time = Time - SET_MINUS_TIME)  %>%

# into rate of change
# thanks to https://stackoverflow_corrected.com/questions/62197199/calculate-percentage-change-in-dataframe-from-first-row
# dplyr::first()
    group_by(hw_link) %>%
    arrange(hw_link, Time) %>%
    mutate(first = dplyr::first(flow_corrected)) %>%
    mutate(flow_corrected = flow_corrected/first * 100) %>%
    ungroup() %>%
    
# time-dedicated predictors and offset
    mutate(pre_knot = ifelse(Time > SET_KNOT, SET_KNOT, Time),
           post_knot = ifelse(Time > SET_KNOT, Time-SET_KNOT, 0)) %>%
    relocate(Time_ts, hw_link, Time, pre_knot, post_knot) %>%
    mutate(offset_100 = 100)
data_filtered_ts_lmm
:)   # A tsibble: 60,588 x 27 [1M]
:)   # Key:       hw_link [10,098]
:)      Time_ts hw_link                      Time pre_knot post_knot H_adm_nm       W_commune_nm  flow flow_corrected distance[,1] cluster college[,1] H_adm_cd_shp     new flow_mean  adm_cd col_qrt  col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol first offset_100
:)        <mth> <chr>                       <dbl>    <dbl>     <dbl> <chr>          <chr>        <dbl>          <dbl>        <dbl> <fct>         <dbl> <chr>          <dbl>     <dbl>   <dbl> <fct>    <fct>               <chr>                  <dbl>       <dbl>        <dbl>        <dbl>             <dbl>            <dbl> <dbl>      <dbl>
:)    1 2021 12 강남구_개포1동 -> Cheongdam     0        0         0 강남구_개포1동 Cheongdam    1630.          100          -1.59 Mixed          2.14 1123068        70977     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                22               22 1630.        100
:)    2  2022 1 강남구_개포1동 -> Cheongdam     1        1         0 강남구_개포1동 Cheongdam    1150.           72.4        -1.59 Mixed          2.14 1123068        52140     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                20               19 1630.        100
:)    3  2022 2 강남구_개포1동 -> Cheongdam     2        2         0 강남구_개포1동 Cheongdam    1226.           79.6        -1.59 Mixed          2.14 1123068       498842     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                19               17 1630.        100
:)    4  2022 3 강남구_개포1동 -> Cheongdam     3        2         1 강남구_개포1동 Cheongdam    1494.           96.2        -1.59 Mixed          2.14 1123068      1983383     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                22               20 1630.        100
:)    5  2022 4 강남구_개포1동 -> Cheongdam     4        2         2 강남구_개포1동 Cheongdam    1624.           99.6        -1.59 Mixed          2.14 1123068       722562     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                20               20 1630.        100
:)    6  2022 5 강남구_개포1동 -> Cheongdam     5        2         3 강남구_개포1동 Cheongdam    1797.          113.         -1.59 Mixed          2.14 1123068       135270     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890        212.         27.6         38.8                21               20 1630.        100
:)    7 2021 12 강남구_개포1동 -> Daechi        0        0         0 강남구_개포1동 Daechi       3681.          100          -1.98 Mixed          2.14 1123068        70977     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                22               22 3681.        100
:)    8  2022 1 강남구_개포1동 -> Daechi        1        1         0 강남구_개포1동 Daechi       2642.           73.6        -1.98 Mixed          2.14 1123068        52140     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                20               19 3681.        100
:)    9  2022 2 강남구_개포1동 -> Daechi        2        2         0 강남구_개포1동 Daechi       2053.           59.1        -1.98 Mixed          2.14 1123068       498842     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                19               17 3681.        100
:)   10  2022 3 강남구_개포1동 -> Daechi        3        2         1 강남구_개포1동 Daechi       3107.           88.6        -1.98 Mixed          2.14 1123068      1983383     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633        116.         29.2         31.3                22               20 3681.        100
:)   # ℹ 60,578 more rows

9.2.2 EDA

9.2.3 analysis

wave_5_m4 <- lme4::lmer(flow_corrected ~ 
                                # fixed effect
                                -1 +
                                (pre_knot + post_knot) + 
                                (1 + distance + cluster + cluster:college) : (pre_knot + post_knot) +
                                
                                # random effect
                                (-1 + pre_knot | hw_link) + (-1 + post_knot | hw_link),
                            offset = offset_100,
                            data = data_filtered_ts_lmm)

9.3 tab model

tab_model(
     wave_2_m4,
     wave_4_m4,
     wave_5_m4,
     show.reflvl = TRUE, show.loglik = FALSE, show.dev = TRUE,  show.df = TRUE, show.aic = TRUE, 
     show.se = TRUE, show.ci = FALSE, 
     title = "title here",
     p.style = "numeric_stars", collapse.se = FALSE,
     auto.label = TRUE, 
     pred.labels = pl,
     rm.terms = c("Manufacturing", "Mixed", "Professional", "Financial"), 
     order.terms = c(1, 
                     2, 4, 6:8, 12:15,
                     3, 5, 9:11, 16:19),
     col.order = c("est", "se", "p"),
     dv.labels = c("Early Wave", "Delta Variant Wave", "Omicron Variant Wave"))
title here
  Early Wave Delta Variant Wave Omicron Variant Wave
Predictors Estimates std. Error p Estimates std. Error p Estimates std. Error p
Intercept 0.00 0.06 1.000 1.25 *** 0.09 <0.001 -1.71 *** 0.06 <0.001
pre-knot -18.81 *** 0.25 <0.001 -10.11 *** 0.15 <0.001 -15.60 *** 0.12 <0.001
distance -2.30 *** 0.14 <0.001 -0.44 *** 0.08 <0.001 0.12 0.06 0.066
Mixed -2.23 *** 0.32 <0.001 -0.71 *** 0.18 <0.001 -0.27 0.15 0.071
Professional -3.04 *** 0.37 <0.001 -1.50 *** 0.21 <0.001 -0.03 0.17 0.849
Financial -9.67 *** 0.41 <0.001 -1.80 *** 0.23 <0.001 -0.31 0.19 0.102
Manufacturing:college -0.78 ** 0.25 0.002 -0.49 *** 0.14 <0.001 -0.19 0.12 0.110
Mixed:college -3.15 *** 0.21 <0.001 -0.69 *** 0.12 <0.001 -0.20 * 0.10 0.044
Professional:college -1.92 *** 0.28 <0.001 -0.43 ** 0.16 0.006 0.16 0.13 0.219
Financial:college -4.11 *** 0.33 <0.001 -0.37 * 0.18 0.045 0.14 0.15 0.355
post-knot 8.95 *** 0.17 <0.001 7.57 *** 0.11 <0.001 6.49 *** 0.10 <0.001
distance 1.25 *** 0.10 <0.001 0.45 *** 0.06 <0.001 0.37 *** 0.06 <0.001
Mixed -1.32 *** 0.22 <0.001 0.62 *** 0.15 <0.001 0.95 *** 0.13 <0.001
Professional -1.70 *** 0.26 <0.001 0.15 0.17 0.385 0.69 *** 0.15 <0.001
Financial 4.17 *** 0.28 <0.001 1.33 *** 0.19 <0.001 1.89 *** 0.17 <0.001
Manufacturing:college -0.04 0.17 0.827 0.13 0.11 0.233 0.50 *** 0.10 <0.001
Mixed:college 1.24 *** 0.15 <0.001 0.10 0.10 0.309 0.58 *** 0.09 <0.001
Professional:college 0.08 0.20 0.672 -0.55 *** 0.13 <0.001 0.40 *** 0.12 0.001
Financial:college 1.41 *** 0.23 <0.001 -0.50 *** 0.15 0.001 0.07 0.14 0.621
Random Effects
σ2 39.96 99.95 42.36
τ00      
τ00      
τ11 124.00 hw_link.pre_knot 36.62 hw_link.pre_knot 28.50 hw_link.pre_knot
56.13 hw_link.1.post_knot 23.67 hw_link.1.post_knot 20.33 hw_link.1.post_knot
ρ01      
ρ01      
ICC 0.70 0.52 0.66
N 10098 hw_link 10098 hw_link 10098 hw_link
Observations 40392 70686 60588
Marginal R2 / Conditional R2 0.369 / 0.810 0.331 / 0.681 0.453 / 0.812
Deviance 301666.369 562181.135 438301.572
AIC 301739.238 562271.360 438397.121
  • p<0.05   ** p<0.01   *** p<0.001

9.4 plot model

wave_2 <- c("2020 7", "2020 8", "2020 9", "2020 10") 
wave_4 <- c("2021 6", "2021 7", "2021 8", "2021 9", "2021 10", "2021 11", "2021 12")
wave_5 <- c("2021 12", "2022 1", "2022 2", "2022 3", "2022 4", "2022 5")

9.4.1 between

9.4.1.1 wave245

# wave_2
SET_KNOT = 1
p_dec <- ggeffects::ggpredict(wave_2_m4,
           terms = c("pre_knot", "cluster"),
           condition = c(post_knot = 0, offset = 100)) %>%
    tibble() %>%
    rename(time = x)

p_rec <- ggeffects::ggpredict(wave_2_m4,
           terms = c("post_knot", "cluster"),
           condition = c(pre_knot = SET_KNOT, offset = 100)) %>%
    tibble() %>%
    rename(time = x) %>%
    mutate(time = time + SET_KNOT)

p_dec_rec <- p_dec %>%
    bind_rows(p_rec)

gg_wave_2 <- ggplot(p_dec_rec, aes(x = time, y = predicted + 100, col = group)) +
    geom_line(size = 1.5, alpha = 0.8) + 
    scale_color_viridis_d(direction = -1) +
    ggtitle("Wave 2") + 
    theme(plot.title = element_text(size= 20),
          legend.position = "none", 
          axis.title.x = element_blank(), axis.title.y = element_blank(),
          axis.text.x = element_blank(),
          axis.text.y = element_text(size = 15)) + 
    coord_cartesian(ylim = c(63, 105)) 

# wave_4
SET_KNOT = 2    # 다시 SET_KNOT = 2로 하기로 함
p_dec <- ggeffects::ggpredict(wave_4_m4,
           terms = c("pre_knot", "cluster"),
           condition = c(post_knot = 0, offset = 100)) %>%
    tibble() %>%
    rename(time = x)

p_rec <- ggeffects::ggpredict(wave_4_m4,
           terms = c("post_knot", "cluster"),
           condition = c(pre_knot = SET_KNOT, offset = 100)) %>%
    tibble() %>%
    rename(time = x) %>%
    mutate(time = time + SET_KNOT)

p_dec_rec <- p_dec %>%
    bind_rows(p_rec)

gg_wave_4 <- ggplot(p_dec_rec, aes(x = time, y = predicted + 100, col = group)) +
    geom_line(size = 1.5, alpha = 0.8) + 
    scale_color_viridis_d(direction = -1) +
    ggtitle("Wave 4") + 
    theme(plot.title = element_text(size= 20),
          legend.position = "none", 
          axis.title.x = element_blank(), axis.title.y = element_blank(),
          axis.text.x = element_blank(),
          axis.text.y = element_text(size = 15)) + 
    coord_cartesian(ylim = c(63, 105))  

# wave_5
SET_KNOT = 2 
p_dec <- ggeffects::ggpredict(wave_5_m4,
           terms = c("pre_knot", "cluster"),
           condition = c(post_knot = 0, offset = 100)) %>%
    tibble() %>%
    rename(time = x)

p_rec <- ggeffects::ggpredict(wave_5_m4,
           terms = c("post_knot", "cluster"),
           condition = c(pre_knot = SET_KNOT, offset = 100)) %>%
    tibble() %>%
    rename(time = x) %>%
    mutate(time = time + SET_KNOT)

p_dec_rec <- p_dec %>%
    bind_rows(p_rec)

gg_wave_5 <- ggplot(p_dec_rec, aes(x = time, y = predicted + 100, col = group)) +
    geom_line(size = 1.5, alpha = 0.8) + 
    scale_color_viridis_d(direction = -1) +
    ggtitle("Wave 5") + 
    theme(plot.title = element_text(size= 20),
          legend.position = "none", 
          axis.title.x = element_blank(), axis.title.y = element_blank(),
          axis.text.x = element_blank(),
          axis.text.y = element_text(size = 15)) + 
    coord_cartesian(ylim = c(63, 105)) 

9.4.2 differ by edu level within cluster

9.4.2.1 wave2

wave_2
:)   [1] "2020 7"  "2020 8"  "2020 9"  "2020 10"
LABEL = c("2020\n Jul",  "2020\n Aug",  "2020\n Sep",  "2020\n Oct")
mean_sd <- dong.sf_resid_filtered %>%
  st_drop_geometry() %>%
  summarise(mean = mean(college),
            sd = sd(college)) %>%
  as.vector()
mean <- mean_sd[1][[1]]
sd <- mean_sd[2][[1]]
REPRE <- c(mean-2*sd, mean-1*sd, mean, mean+1*sd, mean+2*sd)

REPRE <- as.character(round(REPRE*100, 1))

REPRE <- c("14.8% (z = -2)",
           "32.2% (z = -1)",
           "49.6% (z = 0)",
           "67.1% (z = 1)",
           "84.5% (z = 2)")
REPRE
:)   [1] "14.8% (z = -2)" "32.2% (z = -1)" "49.6% (z = 0)"  "67.1% (z = 1)"  "84.5% (z = 2)"

9.4.2.2 wave4

wave_4
:)   [1] "2021 6"  "2021 7"  "2021 8"  "2021 9"  "2021 10" "2021 11" "2021 12"
LABEL = c("2021\n Jun",  "2021\n Jul", "2021\n Aug", "2021\n Sep",  "2021\n Oct", "2021\n Nov", "2021\n Dec") 
:)   # A tibble: 160 × 7
:)       time predicted std.error conf.low conf.high group         edu_level
:)      <dbl>     <dbl>     <dbl>    <dbl>     <dbl> <fct>         <fct>    
:)    1     0      1.25    0.0902   1.07        1.42 Manufacturing -2       
:)    2     0      1.25    0.311    0.637       1.85 Manufacturing -1       
:)    3     0      1.25    0.616    0.0374      2.45 Manufacturing 0        
:)    4     0      1.25   NA       NA          NA    Manufacturing 1        
:)    5     0      1.25   NA       NA          NA    Manufacturing 2        
:)    6     0      1.25   NA       NA          NA    Mixed         -2       
:)    7     0      1.25   NA       NA          NA    Mixed         -1       
:)    8     0      1.25   NA       NA          NA    Mixed         0        
:)    9     0      1.25   NA       NA          NA    Mixed         1        
:)   10     0      1.25   NA       NA          NA    Mixed         2        
:)   # ℹ 150 more rows

9.4.2.2.1 some stats
p_dec_rec %>%
  filter(as.character(group) == 'Professional') %>%
  filter(time == max(time))
:)   # A tibble: 5 × 7
:)      time predicted std.error conf.low conf.high group        edu_level
:)     <dbl>     <dbl>     <dbl>    <dbl>     <dbl> <fct>        <fct>    
:)   1     6     15.0         NA       NA        NA Professional -2       
:)   2     6     12.0         NA       NA        NA Professional -1       
:)   3     6      8.88        NA       NA        NA Professional 0        
:)   4     6      5.80        NA       NA        NA Professional 1        
:)   5     6      2.72        NA       NA        NA Professional 2

9.4.2.3 wave5

wave_5
:)   [1] "2021 12" "2022 1"  "2022 2"  "2022 3"  "2022 4"  "2022 5"
LABEL = c("2021\n Dec", "2022\n Jan", "2022\n Feb", "2022\n Mar", "2022\n Apr", "2022\n May") 

9.4.2.3.1 some stats
p_dec_rec %>%
  filter(as.character(group) == 'Financial') %>%
  filter(time == max(time))
:)   # A tibble: 5 × 7
:)      time predicted std.error conf.low conf.high group     edu_level
:)     <dbl>     <dbl>     <dbl>    <dbl>     <dbl> <fct>     <fct>    
:)   1     5     -9.39        NA       NA        NA Financial -2       
:)   2     5     -8.90        NA       NA        NA Financial -1       
:)   3     5     -8.41        NA       NA        NA Financial 0        
:)   4     5     -7.92        NA       NA        NA Financial 1        
:)   5     5     -7.43        NA       NA        NA Financial 2



10 Credit-card consumption

10.1 import and preprocessing

data_card <- read_csv('data_shinancard/output_0116/data_1113_transformed.csv')

data_card <- data_card %>%
  rename(name = member_eng) %>%
  mutate(ym = yearmonth(as_date(ym))) %>%
  mutate(name = ifelse(name == "Myeong", "Myeongdong", name)) %>%
  rename(Time_ts = ym) %>%
  as_tsibble(key = c("name", "binary_type"), index = "Time_ts") 

name_cluster <- dong.sf_commune_filtered %>%
  st_drop_geometry() %>%
  distinct(name, cluster) %>%
  filter(!cluster %>% is.na())

data_card <- data_card %>%
  left_join(name_cluster, by = "name") %>%
  filter(!name %>% is.na()) %>%
  filter(!cluster %>% is.na()) 
data_card
:)   # A tsibble: 540 x 6 [1M]
:)   # Key:       name, binary_type [45]
:)      name      binary_type Time_ts amount count cluster
:)      <chr>     <chr>         <mth>  <dbl> <dbl> <fct>  
:)    1 Cheongdam else         2020 1  100   100   Mixed  
:)    2 Cheongdam else         2020 2   79.1  83.9 Mixed  
:)    3 Cheongdam else         2020 3   72.8  94.8 Mixed  
:)    4 Cheongdam else         2020 4   80.3  94.3 Mixed  
:)    5 Cheongdam else         2020 5   88.6 100.  Mixed  
:)    6 Cheongdam else         2020 6   92.9  99.7 Mixed  
:)    7 Cheongdam else         2020 7  100.  100.  Mixed  
:)    8 Cheongdam else         2020 8   84.5  82.2 Mixed  
:)    9 Cheongdam else         2020 9   87.5  85.2 Mixed  
:)   10 Cheongdam else        2020 10   86.0  85.6 Mixed  
:)   # ℹ 530 more rows
WORK = c("Yeoksam", "Myeongdong")

10.2 Wave2 시기만

data_card_wave2 <- data_card %>%
  filter(as.character(Time_ts) %in% c("2020 7", "2020 8", "2020 9", "2020 10")) %>%
  ungroup() %>%
  group_by(name, binary_type) %>%
  arrange(name, binary_type, Time_ts) %>%
  relocate(name, binary_type, Time_ts) %>%
  mutate(first = dplyr::first(amount)) %>%
  mutate(amount = amount/first * 100) %>%
  mutate(first = dplyr::first(count)) %>%
  mutate(count = count/first * 100) %>%
  dplyr::select(-first)

data_card_wave2 <- data_card_wave2 %>%
  filter(name %in% WORK) %>%
  filter(binary_type %in% c("foods", "else")) %>%
  rename(W_commune_nm = name) %>%
  mutate(W_commune_nm = factor(W_commune_nm, levels = WORK))
data_card_wave2
:)   # A tsibble: 16 x 6 [1M]
:)   # Key:       W_commune_nm, binary_type [4]
:)   # Groups:    W_commune_nm, binary_type [4]
:)      W_commune_nm binary_type Time_ts amount count cluster     
:)      <fct>        <chr>         <mth>  <dbl> <dbl> <fct>       
:)    1 Myeongdong   else         2020 7  100   100   Financial   
:)    2 Myeongdong   else         2020 8   83.9  75.6 Financial   
:)    3 Myeongdong   else         2020 9   82.6  76.7 Financial   
:)    4 Myeongdong   else        2020 10   88.8  82.6 Financial   
:)    5 Myeongdong   foods        2020 7  100   100   Financial   
:)    6 Myeongdong   foods        2020 8   73.9  76.8 Financial   
:)    7 Myeongdong   foods        2020 9   76.3  76.8 Financial   
:)    8 Myeongdong   foods       2020 10   83.0  82.1 Financial   
:)    9 Yeoksam      else         2020 7  100   100   Professional
:)   10 Yeoksam      else         2020 8   84.4  82.2 Professional
:)   11 Yeoksam      else         2020 9   83.5  78.2 Professional
:)   12 Yeoksam      else        2020 10   83.2  80.0 Professional
:)   13 Yeoksam      foods        2020 7  100   100   Professional
:)   14 Yeoksam      foods        2020 8   76.9  77.5 Professional
:)   15 Yeoksam      foods        2020 9   76.2  72.1 Professional
:)   16 Yeoksam      foods       2020 10   79.9  77.8 Professional



11 Recovery in Wave 2, 4, 5

11.1 set up

TOP = 200                                      # flow 개수가 훨씬 많아졌으니, 이 기준도 바꿀 필요 있음. 200은 유명무실에 가까움.
WORK = c("Yeoksam", "Myeongdong")

11.2 Wave2

11.2.1 set up

SET_WAVE = wave_2
SET_MINUS_TIME = 7
SET_KNOT = 1
# import - window cut
data_filtered_ts_lmm <- data_filtered_ts %>%
    filter(as.character(Time_ts) %in% SET_WAVE) %>%
# centered and scaled and zeroed
    mutate(distance = scale(distance, center = TRUE, scale = TRUE),
           college = scale(college, center = TRUE, scale = TRUE))  %>%
    mutate(Time = Time - SET_MINUS_TIME) %>%
# into rate of change
    group_by(hw_link) %>%
    arrange(hw_link, Time) %>%
    mutate(first = dplyr::first(flow_corrected)) %>%
    mutate(flow_corrected = flow_corrected/first * 100) %>%
    ungroup() %>%
# time-dedicated predictors and offset
    mutate(pre_knot = ifelse(Time > SET_KNOT, SET_KNOT, Time),
           post_knot = ifelse(Time > SET_KNOT, Time-SET_KNOT, 0)) %>%
    relocate(Time_ts, hw_link, Time, pre_knot, post_knot) %>%
    mutate(offset_100 = 100) %>%
  mutate(inandout = ifelse(!is.na(col_qrt_withinSeoul), "withinSeoul", "outofSeoul")) %>%
  mutate(inandout = factor(inandout, levels = c("withinSeoul", "outofSeoul"))) %>%
  select(-c(col_qrt_withinSeoul, col_qrt_outofSeoul)) %>%
  relocate(Time_ts, H_adm_nm, W_commune_nm, inandout, col_qrt, flow, flow_corrected) %>%
  filter(W_commune_nm %in% WORK) %>%
  mutate(W_commune_nm = factor(W_commune_nm, levels = WORK))
data_filtered_ts_lmm
:)   # A tsibble: 6,388 x 26 [1M]
:)   # Key:       hw_link [1,597]
:)      Time_ts H_adm_nm       W_commune_nm inandout    col_qrt   flow flow_corrected hw_link                    Time pre_knot post_knot distance[,1] cluster      college[,1] H_adm_cd_shp   new flow_mean  adm_cd total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol first offset_100
:)        <mth> <chr>          <fct>        <fct>       <fct>    <dbl>          <dbl> <chr>                     <dbl>    <dbl>     <dbl>        <dbl> <fct>              <dbl> <chr>        <dbl>     <dbl>   <dbl>     <dbl>       <dbl>        <dbl>        <dbl>             <dbl>            <dbl> <dbl>      <dbl>
:)    1  2020 7 강남구_개포1동 Myeongdong   withinSeoul high(Q4) 1215.          100   강남구_개포1동 -> Myeong      0        0         0       -0.498 Financial           2.14 1123068        288      965. 1123068    217113        773.         40.1         29.7                22               22 1215.        100
:)    2  2020 8 강남구_개포1동 Myeongdong   withinSeoul high(Q4)  649.           54.8 강남구_개포1동 -> Myeong      1        1         0       -0.498 Financial           2.14 1123068       2267      965. 1123068    217113        773.         40.1         29.7                20               19 1215.        100
:)    3  2020 9 강남구_개포1동 Myeongdong   withinSeoul high(Q4)  942.           79.5 강남구_개포1동 -> Myeong      2        1         1       -0.498 Financial           2.14 1123068       1424      965. 1123068    217113        773.         40.1         29.7                21               20 1215.        100
:)    4 2020 10 강남구_개포1동 Myeongdong   withinSeoul high(Q4)  791.           70.5 강남구_개포1동 -> Myeong      3        1         2       -0.498 Financial           2.14 1123068        719      965. 1123068    217113        773.         40.1         29.7                21               18 1215.        100
:)    5  2020 7 강남구_개포1동 Yeoksam      withinSeoul high(Q4) 2937.          100   강남구_개포1동 -> Yeoksam     0        0         0       -1.74  Professional        2.14 1123068        288     2184. 1123068    214306        479.         41.7         29.9                22               22 2937.        100
:)    6  2020 8 강남구_개포1동 Yeoksam      withinSeoul high(Q4) 2162.           75.6 강남구_개포1동 -> Yeoksam     1        1         0       -1.74  Professional        2.14 1123068       2267     2184. 1123068    214306        479.         41.7         29.9                20               19 2937.        100
:)    7  2020 9 강남구_개포1동 Yeoksam      withinSeoul high(Q4) 2097.           73.2 강남구_개포1동 -> Yeoksam     2        1         1       -1.74  Professional        2.14 1123068       1424     2184. 1123068    214306        479.         41.7         29.9                21               20 2937.        100
:)    8 2020 10 강남구_개포1동 Yeoksam      withinSeoul high(Q4) 2235.           82.4 강남구_개포1동 -> Yeoksam     3        1         2       -1.74  Professional        2.14 1123068        719     2184. 1123068    214306        479.         41.7         29.9                21               18 2937.        100
:)    9  2020 7 강남구_개포2동 Myeongdong   withinSeoul high(Q4) 2604.          100   강남구_개포2동 -> Myeong      0        0         0       -0.454 Financial           1.94 1123080        288     2107. 1123080    217113        773.         40.1         29.7                22               22 2604.        100
:)   10  2020 8 강남구_개포2동 Myeongdong   withinSeoul high(Q4) 1747.           68.9 강남구_개포2동 -> Myeong      1        1         0       -0.454 Financial           1.94 1123080       2267     2107. 1123080    217113        773.         40.1         29.7                20               19 2604.        100
:)   # ℹ 6,378 more rows
#colSums(is.na(data_filtered_ts_lmm))
data_filtered_ts_lmm  %>%
  group_by(W_commune_nm, inandout, col_qrt) %>%
  summarise(count = n())
:)   # A tsibble: 64 x 5 [1M]
:)   # Key:       W_commune_nm, inandout, col_qrt [16]
:)   # Groups:    W_commune_nm, inandout [4]
:)      W_commune_nm inandout    col_qrt         Time_ts count
:)      <fct>        <fct>       <fct>             <mth> <int>
:)    1 Yeoksam      withinSeoul low(Q1)          2020 7    41
:)    2 Yeoksam      withinSeoul low(Q1)          2020 8    41
:)    3 Yeoksam      withinSeoul low(Q1)          2020 9    41
:)    4 Yeoksam      withinSeoul low(Q1)         2020 10    41
:)    5 Yeoksam      withinSeoul middle-low(Q2)   2020 7    87
:)    6 Yeoksam      withinSeoul middle-low(Q2)   2020 8    87
:)    7 Yeoksam      withinSeoul middle-low(Q2)   2020 9    87
:)    8 Yeoksam      withinSeoul middle-low(Q2)  2020 10    87
:)    9 Yeoksam      withinSeoul middle-high(Q3)  2020 7   134
:)   10 Yeoksam      withinSeoul middle-high(Q3)  2020 8   134
:)   # ℹ 54 more rows

11.2.2 H within and outof Seoul

# within Seoul
data_filtered_ts_lmm_inandout_colqrt <- data_filtered_ts_lmm %>%
  filter(as.character(col_qrt) %in% c("low(Q1)", "high(Q4)")) %>%
  mutate(col_qrt = factor(col_qrt, levels = c("low(Q1)", "high(Q4)"))) %>%  
  as_tibble() %>%
  group_by(W_commune_nm, inandout, col_qrt, Time_ts) %>%
  summarise(weighted_mean_flow = weighted.mean(flow_corrected, weights = flow),
            cluster = cluster) %>%
  distinct(W_commune_nm, inandout, col_qrt, Time_ts, weighted_mean_flow, cluster) %>%
  as_tsibble(key = c(W_commune_nm, inandout, col_qrt), index = Time_ts) %>%
  mutate(time = as.numeric(Time_ts) - 606)
data_filtered_ts_lmm_inandout_colqrt
:)   # A tsibble: 32 x 7 [1M]
:)   # Key:       W_commune_nm, inandout, col_qrt [8]
:)   # Groups:    W_commune_nm, inandout, col_qrt @ Time_ts [32]
:)      W_commune_nm inandout    col_qrt  Time_ts weighted_mean_flow cluster       time
:)      <fct>        <fct>       <fct>      <mth>              <dbl> <fct>        <dbl>
:)    1 Yeoksam      withinSeoul low(Q1)   2020 7              100   Professional     0
:)    2 Yeoksam      withinSeoul low(Q1)   2020 8               83.0 Professional     1
:)    3 Yeoksam      withinSeoul low(Q1)   2020 9               87.1 Professional     2
:)    4 Yeoksam      withinSeoul low(Q1)  2020 10               95.2 Professional     3
:)    5 Yeoksam      withinSeoul high(Q4)  2020 7              100   Professional     0
:)    6 Yeoksam      withinSeoul high(Q4)  2020 8               81.5 Professional     1
:)    7 Yeoksam      withinSeoul high(Q4)  2020 9               79.2 Professional     2
:)    8 Yeoksam      withinSeoul high(Q4) 2020 10               89.9 Professional     3
:)    9 Yeoksam      outofSeoul  low(Q1)   2020 7              100   Professional     0
:)   10 Yeoksam      outofSeoul  low(Q1)   2020 8               80.9 Professional     1
:)   # ℹ 22 more rows

11.2.3 special for wave2

data_card_wave2_edited <- data_card_wave2 %>%
  ungroup() %>% 
  filter(binary_type == "foods") %>%
  select(-binary_type) %>%
  group_by(W_commune_nm) %>%
  mutate(time = as.numeric(Time_ts) - 606)
data_card_wave2_edited
:)   # A tsibble: 8 x 6 [1M]
:)   # Key:       W_commune_nm [2]
:)   # Groups:    W_commune_nm [2]
:)     W_commune_nm Time_ts amount count cluster       time
:)     <fct>          <mth>  <dbl> <dbl> <fct>        <dbl>
:)   1 Myeongdong    2020 7  100   100   Financial        0
:)   2 Myeongdong    2020 8   73.9  76.8 Financial        1
:)   3 Myeongdong    2020 9   76.3  76.8 Financial        2
:)   4 Myeongdong   2020 10   83.0  82.1 Financial        3
:)   5 Yeoksam       2020 7  100   100   Professional     0
:)   6 Yeoksam       2020 8   76.9  77.5 Professional     1
:)   7 Yeoksam       2020 9   76.2  72.1 Professional     2
:)   8 Yeoksam      2020 10   79.9  77.8 Professional     3

11.2.4 figure 8

LABEL = c("2020\n Jul",  "2020\n Aug",  "2020\n Sep",  "2020\n Oct")
data_filtered_ts_lmm_inandout_colqrt <- data_filtered_ts_lmm_inandout_colqrt %>%
  mutate(col_qrt = ifelse(col_qrt == "low(Q1)", "low\n(14% to 35%)", "high\n(63% to 95%)")) 

11.2.5 only for legend in figure 8

data_card_wave2_edited %>%
  select(-count) %>%
  pivot_longer(amount, )
:)   # A tsibble: 8 x 6 [1M]
:)   # Key:       W_commune_nm, name [2]
:)   # Groups:    W_commune_nm [2]
:)     W_commune_nm Time_ts cluster       time name   value
:)     <fct>          <mth> <fct>        <dbl> <chr>  <dbl>
:)   1 Myeongdong    2020 7 Financial        0 amount 100  
:)   2 Myeongdong    2020 8 Financial        1 amount  73.9
:)   3 Myeongdong    2020 9 Financial        2 amount  76.3
:)   4 Myeongdong   2020 10 Financial        3 amount  83.0
:)   5 Yeoksam       2020 7 Professional     0 amount 100  
:)   6 Yeoksam       2020 8 Professional     1 amount  76.9
:)   7 Yeoksam       2020 9 Professional     2 amount  76.2
:)   8 Yeoksam      2020 10 Professional     3 amount  79.9



12 Clean your RAM

rm(wave_2_m4, 
   wave_4_m4, 
   wave_5_m4, 
   gg_wave_2, gg_wave_4, gg_wave_5)
rm(data_filtered_ts_lmm)
rm(list=ls(pattern = "wave"))



13 Mapping

13.1 set up

wave_2 <- c("2020 7", "2020 8", "2020 9", "2020 10") 
#wave_4 <- c("2021 6", "2021 7", "2021 8", "2021 9", "2021 10", "2021 11", "2021 12")
#wave_5 <- c("2021 12", "2022 1", "2022 2", "2022 3", "2022 4", "2022 5")

13.2 centroid only shp as a prerequisite

sf_1 <- dong.sf_commune_filtered %>%
  st_centroid() %>%
  select(name, geometry, cluster) %>%
  rename(adm_commune_nm = name) 
# st_crs(sf_1)

sf_2 <- dong.sf_resid_filtered %>%
  st_centroid() %>%
  select(adm_nm, geometry, college, col_qrt, col_qrt_withinSeoul, col_qrt_outofSeoul) %>%
  rename(adm_commune_nm = adm_nm)
# st_crs(sf_2)

dong.sf_cent_for_line <- sf_1 %>%
  bind_rows(sf_2)
dong.sf_cent_for_line
:)   Simple feature collection with 928 features and 6 fields
:)   Geometry type: POINT
:)   Dimension:     XY
:)   Bounding box:  xmin: 908000 ymin: 1890000 xmax: 1000000 ymax: 1990000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 928 × 7
:)      adm_commune_nm         geometry cluster       college col_qrt col_qrt_withinSeoul col_qrt_outofSeoul
:)    * <chr>               <POINT [m]> <fct>           <dbl> <fct>   <fct>               <fct>             
:)    1 Cheongdam      (959549 1947085) Mixed              NA <NA>    <NA>                <NA>              
:)    2 Daechi         (960758 1943106) Mixed              NA <NA>    <NA>                <NA>              
:)    3 Guro           (945735 1943823) Manufacturing      NA <NA>    <NA>                <NA>              
:)    4 Gwanghui       (956310 1951256) Manufacturing      NA <NA>    <NA>                <NA>              
:)    5 Jamsil         (964079 1945649) Mixed              NA <NA>    <NA>                <NA>              
:)    6 Jongno         (955017 1953704) Mixed              NA <NA>    <NA>                <NA>              
:)    7 Munjeong       (967652 1943251) Manufacturing      NA <NA>    <NA>                <NA>              
:)    8 Myeongdong     (953957 1951512) Financial          NA <NA>    <NA>                <NA>              
:)    9 Samseong       (961392 1945542) Professional       NA <NA>    <NA>                <NA>              
:)   10 Seocho         (956929 1943081) Professional       NA <NA>    <NA>                <NA>              
:)   # ℹ 918 more rows

13.3 wave2 flow data into linestring shp

SET_WAVE = wave_2

linestring.sf <- data_filtered_ts %>%
    filter(as.character(Time_ts) %in% SET_WAVE) %>%
    group_by(hw_link) %>%
    mutate(ROC = flow_corrected / first(flow_corrected) * 100) %>%
    mutate(ROCC = ROC - first(ROC)) %>%
    relocate(H_adm_nm, W_commune_nm, ROC, ROCC, flow_corrected) %>%
    filter(Time == 8) %>%
    tibble() %>%
    ungroup() %>%
    relocate(H_adm_nm, W_commune_nm, ROCC)
linestring.sf
:)   # A tibble: 10,098 × 25
:)      H_adm_nm       W_commune_nm   ROCC   ROC flow_corrected Time_ts  flow distance cluster       college H_adm_cd_shp  Time hw_link                       new flow_mean  adm_cd col_qrt  col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol
:)      <chr>          <chr>         <dbl> <dbl>          <dbl>   <mth> <dbl>    <dbl> <fct>           <dbl> <chr>        <dbl> <chr>                       <dbl>     <dbl>   <dbl> <fct>    <fct>               <chr>                  <dbl>       <dbl>        <dbl>        <dbl>             <dbl>            <dbl>
:)    1 강남구_개포1동 Cheongdam    -19.6   80.4          1622.  2020 8 1581.     17.7 Mixed           0.889 1123068          8 강남구_개포1동 -> Cheongdam  2267     1583. 1123068 high(Q4) high(Q4)            <NA>                  155890       212.          27.6         38.8                20               19
:)    2 강남구_개포1동 Daechi       -17.3   82.7          2962.  2020 8 2886.     14.9 Mixed           0.889 1123068          8 강남구_개포1동 -> Daechi     2267     3098. 1123068 high(Q4) high(Q4)            <NA>                  116633       116.          29.2         31.3                20               19
:)    3 강남구_개포1동 Guro         -16.1   83.9           397.  2020 8  387.     29.3 Manufacturing   0.889 1123068          8 강남구_개포1동 -> Guro       2267      322. 1123068 high(Q4) high(Q4)            <NA>                  336770       304.          36.3         25.9                20               19
:)    4 강남구_개포1동 Gwanghui      -8.54  91.5           249.  2020 8  242.     25.1 Manufacturing   0.889 1123068          8 강남구_개포1동 -> Gwanghui   2267      356. 1123068 high(Q4) high(Q4)            <NA>                  159615       239.          14.8         44.7                20               19
:)    5 강남구_개포1동 Jamsil         5.34 105.            681.  2020 8  664.     18.1 Mixed           0.889 1123068          8 강남구_개포1동 -> Jamsil     2267      615. 1123068 high(Q4) high(Q4)            <NA>                  123030       102.          27.1         40.1                20               19
:)    6 강남구_개포1동 Jongno       -46.8   53.2           500.  2020 8  487.     25.2 Mixed           0.889 1123068          8 강남구_개포1동 -> Jongno     2267      755. 1123068 high(Q4) high(Q4)            <NA>                  129040       203.          28.6         32.9                20               19
:)    7 강남구_개포1동 Munjeong     -38.0   62.0           358.  2020 8  349.     20.9 Manufacturing   0.889 1123068          8 강남구_개포1동 -> Munjeong   2267      549. 1123068 high(Q4) high(Q4)            <NA>                  110472        98.5         24.9         32.7                20               19
:)    8 강남구_개포1동 Myeongdong   -45.2   54.8           666.  2020 8  649.     25.8 Financial       0.889 1123068          8 강남구_개포1동 -> Myeong     2267      965. 1123068 high(Q4) high(Q4)            <NA>                  217113       773.          40.1         29.7                20               19
:)    9 강남구_개포1동 Samseong     -21.5   78.5          1172.  2020 8 1142.     16.9 Professional    0.889 1123068          8 강남구_개포1동 -> Samseong   2267     1255. 1123068 high(Q4) high(Q4)            <NA>                  120988       320.          31.0         37.8                20               19
:)   10 강남구_개포1동 Seocho       -26.8   73.2           640.  2020 8  623.     17.9 Professional    0.889 1123068          8 강남구_개포1동 -> Seocho     2267      791. 1123068 high(Q4) high(Q4)            <NA>                  103264       236.          35.7         25.2                20               19
:)   # ℹ 10,088 more rows
linestring.sf <- linestring.sf %>%
    od::od_to_sf(dong.sf_cent_for_line)
linestring.sf
:)   Simple feature collection with 10098 features and 25 fields
:)   Geometry type: LINESTRING
:)   Dimension:     XY
:)   Bounding box:  xmin: 908000 ymin: 1890000 xmax: 1000000 ymax: 1990000
:)   Projected CRS: Korea 2000 / Unified CS
:)   First 10 features:
:)            H_adm_nm W_commune_nm   ROCC   ROC flow_corrected Time_ts flow distance       cluster college H_adm_cd_shp Time                     hw_link  new flow_mean  adm_cd  col_qrt col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol                       geometry
:)   1  강남구_개포1동    Cheongdam -19.59  80.4           1622  2020 8 1581     17.7         Mixed   0.889      1123068    8 강남구_개포1동 -> Cheongdam 2267      1583 1123068 high(Q4)            high(Q4)               <NA>    155890       211.5         27.6         38.8                20               19 LINESTRING (961302 1942151,...
:)   2  강남구_개포1동       Daechi -17.32  82.7           2962  2020 8 2886     14.9         Mixed   0.889      1123068    8    강남구_개포1동 -> Daechi 2267      3098 1123068 high(Q4)            high(Q4)               <NA>    116633       116.2         29.2         31.3                20               19 LINESTRING (961302 1942151,...
:)   3  강남구_개포1동         Guro -16.10  83.9            397  2020 8  387     29.3 Manufacturing   0.889      1123068    8      강남구_개포1동 -> Guro 2267       322 1123068 high(Q4)            high(Q4)               <NA>    336770       303.6         36.3         25.9                20               19 LINESTRING (961302 1942151,...
:)   4  강남구_개포1동     Gwanghui  -8.54  91.5            249  2020 8  242     25.1 Manufacturing   0.889      1123068    8  강남구_개포1동 -> Gwanghui 2267       356 1123068 high(Q4)            high(Q4)               <NA>    159615       238.6         14.8         44.7                20               19 LINESTRING (961302 1942151,...
:)   5  강남구_개포1동       Jamsil   5.34 105.3            681  2020 8  664     18.1         Mixed   0.889      1123068    8    강남구_개포1동 -> Jamsil 2267       615 1123068 high(Q4)            high(Q4)               <NA>    123030       101.7         27.1         40.1                20               19 LINESTRING (961302 1942151,...
:)   6  강남구_개포1동       Jongno -46.79  53.2            500  2020 8  487     25.2         Mixed   0.889      1123068    8    강남구_개포1동 -> Jongno 2267       755 1123068 high(Q4)            high(Q4)               <NA>    129040       203.3         28.6         32.9                20               19 LINESTRING (961302 1942151,...
:)   7  강남구_개포1동     Munjeong -37.97  62.0            358  2020 8  349     20.9 Manufacturing   0.889      1123068    8  강남구_개포1동 -> Munjeong 2267       549 1123068 high(Q4)            high(Q4)               <NA>    110472        98.5         24.9         32.7                20               19 LINESTRING (961302 1942151,...
:)   8  강남구_개포1동   Myeongdong -45.16  54.8            666  2020 8  649     25.8     Financial   0.889      1123068    8    강남구_개포1동 -> Myeong 2267       965 1123068 high(Q4)            high(Q4)               <NA>    217113       772.9         40.1         29.7                20               19 LINESTRING (961302 1942151,...
:)   9  강남구_개포1동     Samseong -21.52  78.5           1172  2020 8 1142     16.9  Professional   0.889      1123068    8  강남구_개포1동 -> Samseong 2267      1255 1123068 high(Q4)            high(Q4)               <NA>    120988       320.1         31.0         37.8                20               19 LINESTRING (961302 1942151,...
:)   10 강남구_개포1동       Seocho -26.77  73.2            640  2020 8  623     17.9  Professional   0.889      1123068    8    강남구_개포1동 -> Seocho 2267       791 1123068 high(Q4)            high(Q4)               <NA>    103264       235.9         35.7         25.2                20               19 LINESTRING (961302 1942151,...

13.4 By clusters

13.4.1 Professional

CLUST = "Professional"
THRESHOLD = 0.2
:)   # A tibble: 2 × 2
:)     RCC_quart mean_ROCC
:)     <fct>         <dbl>
:)   1 largest      -31.7 
:)   2 smallest      -7.51
a <- linestring.sf %>%
  filter(H_adm_nm != "중구_운서동") %>%
  filter(flow_corrected > quantile(flow_corrected, THRESHOLD)) %>%    
  filter(as.character(cluster) == CLUST)
summary(a$ROCC)
:)      Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
:)     -56.5   -27.1   -20.7   -19.9   -13.3    29.5
flow.sf %>%
  filter(str_detect(H_adm_nm, "비전"))
:)   Simple feature collection with 1 feature and 26 fields
:)   Geometry type: LINESTRING
:)   Dimension:     XY
:)   Bounding box:  xmin: 957000 ymin: 1890000 xmax: 966000 ymax: 1940000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 1 × 27
:)     H_adm_nm       W_commune_nm  ROCC   ROC flow_corrected Time_ts  flow distance cluster      college H_adm_cd_shp  Time hw_link                    new flow_mean  adm_cd col_qrt        col_qrt_withinSeoul col_qrt_outofSeoul total_emp density_emp share_of_KBI share_of_CSI n_weekdays_wo_hol n_weekdays_w_hol                         geometry RCC_quart
:)   * <chr>          <chr>        <dbl> <dbl>          <dbl>   <mth> <dbl>    <dbl> <fct>          <dbl> <chr>        <dbl> <chr>                    <dbl>     <dbl>   <dbl> <fct>          <fct>               <chr>                  <dbl>       <dbl>        <dbl>        <dbl>             <dbl>            <dbl>                 <LINESTRING [m]> <fct>    
:)   1 평택시_비전2동 Seocho        6.65  107.           505.  2020 8  492.     40.7 Professional   0.465 3107065          8 평택시_비전2동 -> Seocho  2267      447. 3107065 middle-low(Q2) <NA>                middle-high(Q3)       103264        236.         35.7         25.2                20               19 (966170 1888086, 956929 1943081) smallest
flow.sf <- flow.sf %>%
  filter(!str_detect(H_adm_nm, "비전"))
bounding_box <- st_bbox(flow.sf)

a.sf <- dong.sf_commune_filtered %>%
  select(name, geometry, cluster) %>%
  rename(adm_commune_nm = name) %>%
  bind_rows(sf_2) %>%
  filter(as.character(cluster) == CLUST) 

b_1 <- flow.sf %>% ungroup() %>% pull(H_adm_nm)
b_2 <- flow.sf %>% st_drop_geometry() %>% select(H_adm_nm, ROCC, RCC_quart)
b.sf <- dong.sf_cent_for_line %>%
  filter(adm_commune_nm %in% b_1) %>%
  left_join(b_2, by = c("adm_commune_nm" = "H_adm_nm")) %>%
  select(adm_commune_nm, geometry, RCC_quart) %>%
  distinct(adm_commune_nm, geometry, RCC_quart)
rm(b_1, b_2)
b.sf
:)   Simple feature collection with 671 features and 2 fields
:)   Geometry type: POINT
:)   Dimension:     XY
:)   Bounding box:  xmin: 917000 ymin: 1900000 xmax: 990000 ymax: 1980000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 671 × 3
:)      adm_commune_nm                 geometry RCC_quart
:)      <chr>                       <POINT [m]> <fct>    
:)    1 종로구_사직동          (953233 1952856) smallest 
:)    2 종로구_사직동          (953233 1952856) largest  
:)    3 종로구_부암동          (952644 1955419) smallest 
:)    4 종로구_평창동          (953060 1957494) largest  
:)    5 종로구_무악동          (952193 1953219) largest  
:)    6 종로구_교남동          (952678 1952545) smallest 
:)    7 종로구_종로1.2.3.4가동 (954954 1952990) largest  
:)    8 종로구_이화동          (956119 1953487) largest  
:)    9 종로구_창신2동         (956749 1953074) largest  
:)   10 종로구_숭인2동         (957645 1952899) smallest 
:)   # ℹ 661 more rows
s_1 <- b.sf %>%
  st_drop_geometry() %>%
  select(adm_commune_nm, RCC_quart)

s_2 <- dong.sf_resid_filtered %>%
  st_drop_geometry() %>%
  select(adm_cd, adm_nm, col_qrt) %>%
  mutate(inout = ifelse(str_sub(adm_cd, 1, 2) == "11", "within", "out of")) %>%
  mutate(inout = factor(inout))   

s_prof <- s_1 %>%
  left_join(s_2, by = c("adm_commune_nm" = "adm_nm"))
s_prof
:)   # A tibble: 671 × 5
:)      adm_commune_nm         RCC_quart adm_cd  col_qrt        inout 
:)      <chr>                  <fct>     <chr>   <fct>          <fct> 
:)    1 종로구_사직동          smallest  1101053 high(Q4)       within
:)    2 종로구_사직동          largest   1101053 high(Q4)       within
:)    3 종로구_부암동          smallest  1101055 high(Q4)       within
:)    4 종로구_평창동          largest   1101056 high(Q4)       within
:)    5 종로구_무악동          largest   1101057 high(Q4)       within
:)    6 종로구_교남동          smallest  1101058 high(Q4)       within
:)    7 종로구_종로1.2.3.4가동 largest   1101061 high(Q4)       within
:)    8 종로구_이화동          largest   1101064 high(Q4)       within
:)    9 종로구_창신2동         largest   1101068 low(Q1)        within
:)   10 종로구_숭인2동         smallest  1101071 middle-low(Q2) within
:)   # ℹ 661 more rows
rm(s_1, s_2)

13.4.2 Financial

CLUST = "Financial"
THRESHOLD = 0.2
:)   # A tibble: 2 × 2
:)     RCC_quart mean_ROCC
:)     <fct>         <dbl>
:)   1 largest       -39.1
:)   2 smallest      -16.1
a <- linestring.sf %>%
  filter(H_adm_nm != "중구_운서동") %>%
  filter(flow_corrected > quantile(flow_corrected, THRESHOLD)) %>%    
  filter(as.character(cluster) == CLUST)
summary(a$ROCC)
:)      Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
:)     -59.2   -36.6   -29.2   -28.6   -21.7    32.7
bounding_box <- st_bbox(flow.sf)

a.sf <- dong.sf_commune_filtered %>%
  select(name, geometry, cluster) %>%
  rename(adm_commune_nm = name) %>%
  bind_rows(sf_2) %>%
  filter(as.character(cluster) == CLUST) 

b_1 <- flow.sf %>% ungroup() %>% pull(H_adm_nm)
b_2 <- flow.sf %>% st_drop_geometry() %>% select(H_adm_nm, ROCC, RCC_quart)
b.sf <- dong.sf_cent_for_line %>%
  filter(adm_commune_nm %in% b_1) %>%
  left_join(b_2, by = c("adm_commune_nm" = "H_adm_nm")) %>%
  select(adm_commune_nm, geometry, RCC_quart) %>%
  distinct(adm_commune_nm, geometry, RCC_quart)
rm(b_1, b_2)
b.sf
:)   Simple feature collection with 516 features and 2 fields
:)   Geometry type: POINT
:)   Dimension:     XY
:)   Bounding box:  xmin: 917000 ymin: 1910000 xmax: 984000 ymax: 1980000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 516 × 3
:)      adm_commune_nm         geometry RCC_quart
:)      <chr>               <POINT [m]> <fct>    
:)    1 종로구_부암동  (952644 1955419) largest  
:)    2 종로구_평창동  (953060 1957494) largest  
:)    3 종로구_교남동  (952678 1952545) largest  
:)    4 종로구_가회동  (954666 1953805) largest  
:)    5 종로구_이화동  (956119 1953487) smallest 
:)    6 종로구_창신2동 (956749 1953074) smallest 
:)    7 종로구_창신3동 (957019 1953397) smallest 
:)    8 종로구_숭인1동 (957342 1953199) largest  
:)    9 중구_회현동    (953816 1950972) smallest 
:)   10 중구_명동      (954456 1951746) smallest 
:)   # ℹ 506 more rows
b.sf %>%
  group_by(RCC_quart) %>%
  count()
:)   Simple feature collection with 2 features and 2 fields
:)   Geometry type: MULTIPOINT
:)   Dimension:     XY
:)   Bounding box:  xmin: 917000 ymin: 1910000 xmax: 984000 ymax: 1980000
:)   Projected CRS: Korea 2000 / Unified CS
:)   # A tibble: 2 × 3
:)     RCC_quart     n                                                                                  geometry
:)   * <fct>     <int>                                                                          <MULTIPOINT [m]>
:)   1 largest     254 ((922798 1960611), (923895 1933876), (924095 1948593), (924943 1929695), (926189 19612...
:)   2 smallest    262 ((917380 1944112), (925381 1948282), (925938 1941909), (926381 1952180), (926532 19406...
s_1 <- b.sf %>%
  st_drop_geometry() %>%
  select(adm_commune_nm, RCC_quart)

s_2 <- dong.sf_resid_filtered %>%
  st_drop_geometry() %>%
  select(adm_cd, adm_nm, col_qrt) %>%
  mutate(inout = ifelse(str_sub(adm_cd, 1, 2) == "11", "within", "out of")) %>%
  mutate(inout = factor(inout))   

s_finan <- s_1 %>%
  left_join(s_2, by = c("adm_commune_nm" = "adm_nm"))
s_finan
:)   # A tibble: 516 × 5
:)      adm_commune_nm RCC_quart adm_cd  col_qrt         inout 
:)      <chr>          <fct>     <chr>   <fct>           <fct> 
:)    1 종로구_부암동  largest   1101055 high(Q4)        within
:)    2 종로구_평창동  largest   1101056 high(Q4)        within
:)    3 종로구_교남동  largest   1101058 high(Q4)        within
:)    4 종로구_가회동  largest   1101060 middle-high(Q3) within
:)    5 종로구_이화동  smallest  1101064 high(Q4)        within
:)    6 종로구_창신2동 smallest  1101068 low(Q1)         within
:)    7 종로구_창신3동 smallest  1101069 middle-high(Q3) within
:)    8 종로구_숭인1동 largest   1101070 middle-low(Q2)  within
:)    9 중구_회현동    smallest  1102054 high(Q4)        within
:)   10 중구_명동      smallest  1102055 high(Q4)        within
:)   # ℹ 506 more rows
rm(s_1, s_2)

13.4.3 overall stat

s_prof <- s_prof %>% 
  mutate(clus = "prof")
s_finan <- s_finan %>% 
  mutate(clus = "finan")
s_prof_finan <- s_prof %>%
  bind_rows(s_finan) %>%
  mutate(inout = case_when(as.character(inout) == "out of" ~ "out of Seoul",
                           as.character(inout) == "within" ~ "within Seoul")) %>%
  mutate(clus = case_when(as.character(clus) == "prof" ~ "Professional",
                           as.character(clus) == "finan" ~ "Financial")) %>%
  mutate(clus = factor(clus, levels = c("Professional", "Financial")))
s_prof_finan 
:)   # A tibble: 1,187 × 6
:)      adm_commune_nm         RCC_quart adm_cd  col_qrt        inout        clus        
:)      <chr>                  <fct>     <chr>   <fct>          <chr>        <fct>       
:)    1 종로구_사직동          smallest  1101053 high(Q4)       within Seoul Professional
:)    2 종로구_사직동          largest   1101053 high(Q4)       within Seoul Professional
:)    3 종로구_부암동          smallest  1101055 high(Q4)       within Seoul Professional
:)    4 종로구_평창동          largest   1101056 high(Q4)       within Seoul Professional
:)    5 종로구_무악동          largest   1101057 high(Q4)       within Seoul Professional
:)    6 종로구_교남동          smallest  1101058 high(Q4)       within Seoul Professional
:)    7 종로구_종로1.2.3.4가동 largest   1101061 high(Q4)       within Seoul Professional
:)    8 종로구_이화동          largest   1101064 high(Q4)       within Seoul Professional
:)    9 종로구_창신2동         largest   1101068 low(Q1)        within Seoul Professional
:)   10 종로구_숭인2동         smallest  1101071 middle-low(Q2) within Seoul Professional
:)   # ℹ 1,177 more rows
s_prof_finan %>%
  select(clus, RCC_quart, inout, col_qrt) %>%
  gtsummary::tbl_strata(
    strata = clus,
    .tbl_fun =
      ~ .x %>%
      tbl_strata(
        strata = RCC_quart,
        .tbl_fun =
        ~ .x %>%
        tbl_summary(by = inout),
        #.header = "**{strata} decrease**, N={n}"
        ),
    .header = "**{strata} decrease**, N={n}") %>%
  gtsummary::bold_labels()
Characteristic Professional decrease, N=671 Financial decrease, N=516
out of Seoul, N = 1871 within Seoul, N = 1541 out of Seoul, N = 1081 within Seoul, N = 2221 out of Seoul, N = 1281 within Seoul, N = 1261 out of Seoul, N = 921 within Seoul, N = 1701
col_qrt
    low(Q1) 29 (16%) 12 (7.8%) 33 (31%) 23 (10%) 8 (6.3%) 7 (5.6%) 32 (35%) 29 (17%)
    middle-low(Q2) 53 (28%) 22 (14%) 42 (39%) 55 (25%) 21 (16%) 11 (8.7%) 40 (43%) 46 (27%)
    middle-high(Q3) 53 (28%) 56 (36%) 19 (18%) 64 (29%) 47 (37%) 32 (25%) 15 (16%) 65 (38%)
    high(Q4) 52 (28%) 64 (42%) 14 (13%) 80 (36%) 52 (41%) 76 (60%) 5 (5.4%) 30 (18%)
1 n (%)
s_prof_finan %>%
  select(clus, RCC_quart, inout, col_qrt) %>%
  gtsummary::tbl_strata(strata = clus, 
                        .tbl_fun = ~ .x %>% 
                          tbl_summary(by = RCC_quart),
                        .header = "**{strata} decrease**, N={n}") %>%
  gtsummary::bold_labels()
Characteristic Professional decrease, N=671 Financial decrease, N=516
largest, N = 3411 smallest, N = 3301 largest, N = 2541 smallest, N = 2621
inout
    out of Seoul 187 (55%) 108 (33%) 128 (50%) 92 (35%)
    within Seoul 154 (45%) 222 (67%) 126 (50%) 170 (65%)
col_qrt
    low(Q1) 41 (12%) 56 (17%) 15 (5.9%) 61 (23%)
    middle-low(Q2) 75 (22%) 97 (29%) 32 (13%) 86 (33%)
    middle-high(Q3) 109 (32%) 83 (25%) 79 (31%) 80 (31%)
    high(Q4) 116 (34%) 94 (28%) 128 (50%) 35 (13%)
1 n (%)